Skip to main content

datasynth_runtime/
output_writer.rs

1//! Comprehensive output writer for all generated data.
2//!
3//! Writes all generated data from the EnhancedGenerationResult to files
4//! in the output directory. Uses CSV for flat tabular data (journal entry
5//! lines) and JSON for types with nested structures (Vecs, sub-structs).
6
7use std::cell::Cell;
8use std::io::Write;
9use std::path::Path;
10
11use crate::enhanced_orchestrator::EnhancedGenerationResult;
12use datasynth_core::documents::PaymentType;
13use datasynth_output::OutputRootConfig;
14use tracing::{info, warn};
15
16thread_local! {
17    /// Thread-local flat-layout flag. When true, every `write_json_safe` call
18    /// routes through `write_json_flat` so nested `{header, lines}` shapes get
19    /// flattened. Set by `write_all_output_with_layout` at the top of its body,
20    /// reset on exit.
21    static FLAT_LAYOUT_ACTIVE: Cell<bool> = const { Cell::new(false) };
22
23    /// Thread-local JSON skip flag. When true, `write_json_safe` becomes a no-op.
24    /// Set by `write_all_output_with_layout` when the requested formats don't
25    /// include JSON. This avoids wrapping 190+ call sites in `if write_json`.
26    static SKIP_JSON: Cell<bool> = const { Cell::new(false) };
27}
28
29/// Write a JSON file for any serializable slice. Skips empty slices.
30///
31/// Streams JSON directly to a buffered file writer instead of allocating
32/// the entire JSON string in memory (Phase 3 I/O optimization).
33/// Write a JSON array by streaming one record at a time.
34///
35/// Instead of serializing the entire `&[T]` in one `to_writer_pretty` call
36/// (which builds a massive in-memory serde state for large arrays), this
37/// writes `[\n` + per-record pretty-printed JSON with commas + `\n]`.
38///
39/// For 200K+ records this reduces peak memory and improves write throughput
40/// by avoiding serde's internal buffering of the full array structure.
41fn write_json<T: serde::Serialize>(
42    data: &[T],
43    path: &Path,
44    label: &str,
45) -> Result<(), Box<dyn std::error::Error>> {
46    use std::io::Write;
47
48    if data.is_empty() {
49        return Ok(());
50    }
51
52    let file = std::fs::File::create(path)?;
53    let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
54
55    // Stream records one at a time into a JSON array
56    writer.write_all(b"[\n")?;
57    for (i, item) in data.iter().enumerate() {
58        if i > 0 {
59            writer.write_all(b",\n")?;
60        }
61        serde_json::to_writer_pretty(&mut writer, item)?;
62    }
63    writer.write_all(b"\n]\n")?;
64    writer.flush()?;
65
66    info!(
67        "  {} written: {} records -> {}",
68        label,
69        data.len(),
70        path.display()
71    );
72    Ok(())
73}
74
75/// Write journal entry lines as a flat CSV file.
76///
77/// This extracts the key fields from both the header and each line item to
78/// produce a single flat CSV that can be loaded directly into dataframes.
79fn write_journal_entries_csv(
80    result: &EnhancedGenerationResult,
81    output_dir: &Path,
82) -> Result<(), Box<dyn std::error::Error>> {
83    if result.journal_entries.is_empty() {
84        return Ok(());
85    }
86
87    let path = output_dir.join("journal_entries.csv");
88    let file = std::fs::File::create(&path)?;
89    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
90
91    // Write header.
92    //
93    // Schema note: each release that widens the schema appends new
94    // columns at the end so existing column-positional consumers keep
95    // working.
96    //   v5.5.1 added:
97    //     is_manual, is_post_close, source_system     (audit / ETL provenance)
98    //     account_description                         (joined from CoA)
99    //     financial_statement_category                (asset/liability/...)
100    //     assignment, value_date, tax_code            (already-populated line fields)
101    //     transaction_id                              (stable per-line id)
102    //   v5.6.0 added (ISO 21378 Audit Data Collection classification):
103    //     account_class, account_class_name           (Level-2 e.g. "A.B" / "Trade Receivables")
104    //     account_sub_class, account_sub_class_name   (Level-3 e.g. "A.B.A" / "Trade Accounts Receivable")
105    //   v5.8.0 added:
106    //     predecessor_line_id                         (UUID v5 of preceding line in document chain;
107    //                                                  populated by document_flow_je_generator for
108    //                                                  P2P / O2C chains, empty for chain heads and
109    //                                                  for purely-GL adjustments)
110    writeln!(
111        w,
112        "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
113         document_type,currency,exchange_rate,reference,header_text,created_by,source,\
114         business_process,ledger,is_fraud,is_anomaly,\
115         line_number,gl_account,debit_amount,credit_amount,local_amount,\
116         cost_center,profit_center,line_text,\
117         auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
118         is_manual,is_post_close,source_system,\
119         account_description,financial_statement_category,\
120         assignment,value_date,tax_code,transaction_id,\
121         account_class,account_class_name,account_sub_class,account_sub_class_name,\
122         predecessor_line_id"
123    )?;
124
125    // Build a CoA → (short_description, ISO class, ISO sub-class) lookup.
126    // Empty when no CoA was generated (e.g. some smoke tests); resolution
127    // falls back to the line's already-populated `account_description`
128    // and to empty ISO codes.
129    let coa_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> = result
130        .chart_of_accounts
131        .accounts
132        .iter()
133        .map(|a| {
134            (
135                a.account_number.as_str(),
136                (
137                    a.short_description.as_str(),
138                    a.account_class.as_str(),
139                    a.account_class_name.as_str(),
140                    a.account_sub_class.as_str(),
141                    a.account_sub_class_name.as_str(),
142                ),
143            )
144        })
145        .collect();
146
147    for je in &result.journal_entries {
148        let h = &je.header;
149        for line in &je.lines {
150            let lettrage_date_str = line
151                .lettrage_date
152                .map(|d| d.to_string())
153                .unwrap_or_default();
154            let value_date_str = line.value_date.map(|d| d.to_string()).unwrap_or_default();
155            // Look up CoA-joined fields in one shot.
156            let coa_hit = coa_index.get(line.gl_account.as_str()).copied();
157            let coa_short_desc = coa_hit.map(|t| t.0).unwrap_or("");
158            let coa_class = coa_hit.map(|t| t.1).unwrap_or("");
159            let coa_class_name = coa_hit.map(|t| t.2).unwrap_or("");
160            let coa_sub_class = coa_hit.map(|t| t.3).unwrap_or("");
161            let coa_sub_class_name = coa_hit.map(|t| t.4).unwrap_or("");
162            // Prefer the line's own account_description; fall back to the CoA
163            // lookup so consumers always get a name even when the generator
164            // forgot to populate the field.
165            let account_description: &str = line
166                .account_description
167                .as_deref()
168                .filter(|s| !s.is_empty())
169                .unwrap_or(coa_short_desc);
170            // Derive the FSA category from the gl_account prefix (1xxx=asset,
171            // 2xxx=liability, ...). Cheap, deterministic, no CoA dependency.
172            let fsa_category =
173                datasynth_core::accounts::AccountCategory::from_account(line.gl_account.as_str())
174                    .as_label();
175            // Stable per-line identifier (UUID v5 of document_id+line_number).
176            let transaction_id = line.transaction_id.clone().unwrap_or_else(|| {
177                datasynth_core::models::JournalEntryLine::derive_transaction_id(
178                    line.document_id,
179                    line.line_number,
180                )
181            });
182            writeln!(
183                w,
184                "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
185                h.document_id,
186                csv_escape(&h.company_code),
187                h.fiscal_year,
188                h.fiscal_period,
189                h.posting_date,
190                h.document_date,
191                csv_escape(&h.document_type),
192                csv_escape(&h.currency),
193                h.exchange_rate,
194                csv_opt_str(&h.reference),
195                csv_opt_str(&h.header_text),
196                csv_escape(&h.created_by),
197                h.source,
198                h.business_process
199                    .map(|bp| format!("{bp:?}"))
200                    .unwrap_or_default(),
201                csv_escape(&h.ledger),
202                h.is_fraud,
203                h.is_anomaly,
204                line.line_number,
205                csv_escape(&line.gl_account),
206                line.debit_amount,
207                line.credit_amount,
208                line.local_amount,
209                csv_opt_str(&line.cost_center),
210                csv_opt_str(&line.profit_center),
211                csv_opt_str(&line.line_text),
212                csv_opt_str(&line.auxiliary_account_number),
213                csv_opt_str(&line.auxiliary_account_label),
214                csv_opt_str(&line.lettrage),
215                lettrage_date_str,
216                h.is_manual,
217                h.is_post_close,
218                csv_escape(&h.source_system),
219                csv_escape(account_description),
220                fsa_category,
221                csv_opt_str(&line.assignment),
222                value_date_str,
223                csv_opt_str(&line.tax_code),
224                csv_escape(&transaction_id),
225                csv_escape(coa_class),
226                csv_escape(coa_class_name),
227                csv_escape(coa_sub_class),
228                csv_escape(coa_sub_class_name),
229                csv_opt_str(&line.predecessor_line_id),
230            )?;
231        }
232    }
233
234    w.flush()?;
235    let total_lines: usize = result.journal_entries.iter().map(|je| je.lines.len()).sum();
236    info!(
237        "  Journal entries CSV written: {} entries, {} line items -> {}",
238        result.journal_entries.len(),
239        total_lines,
240        path.display()
241    );
242    Ok(())
243}
244
245/// **v5.8.0** — write `graphs/je_network.csv`: a flat edge-list of the
246/// accounting network derived from journal entries.
247///
248/// Each row represents one debit↔credit flow within a single JE,
249/// formed via the cartesian product of debit lines × credit lines (the
250/// approach in `datasynth-graph::TransactionGraphBuilder`). For a
251/// 2-line JE this is exactly the bijective Method-A flow from
252/// Ivertowski et al. (2024); for larger JEs it is a Method-B/C
253/// approximation with proportional amount allocation.
254///
255/// Joins back to `journal_entries.csv` via:
256///   - `document_id` → JE-level header
257///   - `from_line_id` / `to_line_id` → per-line `transaction_id`
258///   - `predecessor_edge_id` → previous flow in a document chain
259fn write_je_network_csv(
260    result: &EnhancedGenerationResult,
261    output_dir: &Path,
262    method: datasynth_config::JeNetworkMethod,
263) -> Result<(), Box<dyn std::error::Error>> {
264    use rust_decimal::Decimal;
265
266    if result.journal_entries.is_empty() {
267        return Ok(());
268    }
269    let graphs_dir = output_dir.join("graphs");
270    std::fs::create_dir_all(&graphs_dir)?;
271    let path = graphs_dir.join("je_network.csv");
272    let file = std::fs::File::create(&path)?;
273    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
274
275    writeln!(
276        w,
277        "edge_id,document_id,posting_date,from_account,to_account,\
278         from_line_id,to_line_id,amount,confidence,\
279         predecessor_edge_id,business_process,is_fraud,is_anomaly"
280    )?;
281
282    // Build a map line_transaction_id → edge_id of the FIRST out-going
283    // edge from that line so predecessor_edge_id can be resolved without
284    // a second pass: if a curr-line's predecessor_line_id points at
285    // some prev-line's transaction_id, the predecessor edge is the
286    // first edge in that prev JE that has this line as its credit
287    // ("from") side. This reasonably maps each line to one canonical
288    // outgoing edge.
289    //
290    // We populate the map during the first pass (write the rows) and
291    // resolve predecessor_edge_id with the existing entry — JEs are
292    // emitted in chain order, so the predecessor will already be in
293    // the map by the time we look it up.
294    let mut line_id_to_edge_id: std::collections::HashMap<String, String> =
295        std::collections::HashMap::with_capacity(result.journal_entries.len() * 2);
296
297    let mut total_edges: usize = 0;
298
299    for je in &result.journal_entries {
300        let h = &je.header;
301        // Pre-compute transaction_ids for every line (so we can pair
302        // them without re-deriving inside the inner loop).
303        let line_ids: Vec<String> = je
304            .lines
305            .iter()
306            .map(|l| {
307                l.transaction_id.clone().unwrap_or_else(|| {
308                    datasynth_core::models::JournalEntryLine::derive_transaction_id(
309                        l.document_id,
310                        l.line_number,
311                    )
312                })
313            })
314            .collect();
315
316        let debits: Vec<usize> = je
317            .lines
318            .iter()
319            .enumerate()
320            .filter(|(_, l)| l.debit_amount > Decimal::ZERO)
321            .map(|(i, _)| i)
322            .collect();
323        let credits: Vec<usize> = je
324            .lines
325            .iter()
326            .enumerate()
327            .filter(|(_, l)| l.credit_amount > Decimal::ZERO)
328            .map(|(i, _)| i)
329            .collect();
330        if debits.is_empty() || credits.is_empty() {
331            continue;
332        }
333
334        // Method A: bijective on 2-line entries only.  Multi-line JEs
335        // are skipped under this method — see Ivertowski (2024)
336        // Methods A through E.  The full Cartesian product of a
337        // multi-line consolidation produces O(n × m) edges per JE,
338        // which dominates total dataset size at scale; users who need
339        // the multi-line edges should set
340        // `graph_export.je_network.method: cartesian` (the default).
341        if method == datasynth_config::JeNetworkMethod::A
342            && !(debits.len() == 1 && credits.len() == 1)
343        {
344            continue;
345        }
346
347        let total_debit: Decimal = debits.iter().map(|i| je.lines[*i].debit_amount).sum();
348        let total_credit: Decimal = credits.iter().map(|i| je.lines[*i].credit_amount).sum();
349        if total_debit.is_zero() || total_credit.is_zero() {
350            continue;
351        }
352
353        // Confidence per the paper: bijective on 2-line entries
354        // (Method A), 1/(n*m) approximation otherwise.
355        let confidence: f64 = if debits.len() == 1 && credits.len() == 1 {
356            1.0
357        } else {
358            1.0 / (debits.len() * credits.len()) as f64
359        };
360
361        let bp = h
362            .business_process
363            .map(|bp| format!("{bp:?}"))
364            .unwrap_or_default();
365        let posting_date = h.posting_date.to_string();
366        let doc_id = h.document_id.to_string();
367
368        for &di in &debits {
369            let debit_line = &je.lines[di];
370            let to_line_id = &line_ids[di];
371            for &ci in &credits {
372                let credit_line = &je.lines[ci];
373                let from_line_id = &line_ids[ci];
374
375                // Edge id = UUID v5 of (document_id, debit.line_number,
376                // credit.line_number). Stable across regenerations.
377                let mut input = Vec::with_capacity(16 + 8);
378                input.extend_from_slice(h.document_id.as_bytes());
379                input.extend_from_slice(&debit_line.line_number.to_le_bytes());
380                input.extend_from_slice(&credit_line.line_number.to_le_bytes());
381                let edge_id = uuid::Uuid::new_v5(&uuid::Uuid::NAMESPACE_OID, &input).to_string();
382
383                // Proportional allocation of the flow's amount (matches
384                // the existing TransactionGraphBuilder::add_journal_entry
385                // _debit_credit formula).
386                let proportion = (debit_line.debit_amount / total_debit)
387                    * (credit_line.credit_amount / total_credit);
388                let amount = debit_line.debit_amount * proportion;
389
390                // Resolve predecessor: the predecessor_line_id from the
391                // CREDIT side (it represents the "from" of the next
392                // edge); fall back to the debit side. Either points at
393                // a transaction_id whose first outgoing edge id is the
394                // predecessor edge.
395                let predecessor_edge_id: String = credit_line
396                    .predecessor_line_id
397                    .as_ref()
398                    .or(debit_line.predecessor_line_id.as_ref())
399                    .and_then(|tx_id| line_id_to_edge_id.get(tx_id).cloned())
400                    .unwrap_or_default();
401
402                writeln!(
403                    w,
404                    "{},{},{},{},{},{},{},{},{},{},{},{},{}",
405                    csv_escape(&edge_id),
406                    csv_escape(&doc_id),
407                    csv_escape(&posting_date),
408                    csv_escape(&credit_line.gl_account),
409                    csv_escape(&debit_line.gl_account),
410                    csv_escape(from_line_id),
411                    csv_escape(to_line_id),
412                    amount,
413                    confidence,
414                    csv_escape(&predecessor_edge_id),
415                    csv_escape(&bp),
416                    h.is_fraud,
417                    h.is_anomaly,
418                )?;
419
420                // Map the credit line ("from" end) to its first edge —
421                // this is the canonical outgoing edge subsequent JEs
422                // can refer to as their predecessor.
423                line_id_to_edge_id
424                    .entry(from_line_id.clone())
425                    .or_insert_with(|| edge_id.clone());
426                total_edges += 1;
427            }
428        }
429    }
430
431    w.flush()?;
432    info!(
433        "  JE network CSV written: {} edges from {} entries -> {}",
434        total_edges,
435        result.journal_entries.len(),
436        path.display()
437    );
438    Ok(())
439}
440
441/// Write journal entries as flat JSON (header fields merged onto each line).
442///
443/// Each object in the output array contains all header fields plus all line fields,
444/// with no nesting. This is the analytics-friendly format.
445fn write_journal_entries_flat_json(
446    result: &EnhancedGenerationResult,
447    output_dir: &Path,
448) -> Result<(), Box<dyn std::error::Error>> {
449    if result.journal_entries.is_empty() {
450        return Ok(());
451    }
452
453    let path = output_dir.join("journal_entries.json");
454    let file = std::fs::File::create(&path)?;
455    let mut writer = std::io::BufWriter::with_capacity(256 * 1024, file);
456
457    // Write opening bracket
458    writer.write_all(b"[\n")?;
459
460    let mut first = true;
461    let mut total_lines = 0usize;
462    for je in &result.journal_entries {
463        // Serialize header to a JSON map
464        let header_value = serde_json::to_value(&je.header)?;
465
466        for line in &je.lines {
467            if !first {
468                writer.write_all(b",\n")?;
469            }
470            first = false;
471            total_lines += 1;
472
473            // Serialize line to a JSON map, then merge header fields in
474            let mut line_value = serde_json::to_value(line)?;
475
476            if let serde_json::Value::Object(ref header_map) = header_value {
477                if let serde_json::Value::Object(ref mut line_map) = line_value {
478                    for (key, val) in header_map {
479                        // Line fields take precedence for shared keys (e.g. document_id)
480                        if !line_map.contains_key(key) {
481                            line_map.insert(key.clone(), val.clone());
482                        }
483                    }
484                }
485            }
486
487            serde_json::to_writer_pretty(&mut writer, &line_value)?;
488        }
489    }
490
491    writer.write_all(b"\n]\n")?;
492    writer.flush()?;
493    info!(
494        "  Journal entries (flat JSON) written: {} line items -> {}",
495        total_lines,
496        path.display()
497    );
498    Ok(())
499}
500
501/// v4.4.2 helper — walk a serialized OCEL event-log `Value` tree and
502/// mirror `object_type_id` into `object_type` on every
503/// `object_refs[*]` entry. The canonical OCEL 2.0 field name is
504/// `object_type`; DataSynth's internal model carries it as
505/// `object_type_id` for historical reasons. Emitting both keys lets
506/// OCEL-spec-compliant consumers (pm4py, Celonis, etc.) see the type
507/// without a rename step.
508fn add_ocel_object_type_alias(value: &mut serde_json::Value) {
509    if let Some(events) = value.get_mut("events").and_then(|v| v.as_array_mut()) {
510        for event in events.iter_mut() {
511            if let Some(refs) = event.get_mut("object_refs").and_then(|r| r.as_array_mut()) {
512                for oref in refs.iter_mut() {
513                    if let Some(obj) = oref.as_object_mut() {
514                        if let Some(oti) = obj.get("object_type_id").cloned() {
515                            obj.entry("object_type").or_insert(oti);
516                        }
517                    }
518                }
519            }
520        }
521    }
522}
523
524/// Escape a string for CSV output by quoting if it contains commas or quotes.
525fn csv_escape(s: &str) -> String {
526    if s.contains(',') || s.contains('"') || s.contains('\n') {
527        format!("\"{}\"", s.replace('"', "\"\""))
528    } else {
529        s.to_string()
530    }
531}
532
533/// Format an Option<String> for CSV output (empty string for None).
534fn csv_opt_str(opt: &Option<String>) -> String {
535    match opt {
536        Some(s) => csv_escape(s),
537        None => String::new(),
538    }
539}
540
541/// Write all generated data to the output directory.
542///
543/// This function exports every non-empty dataset from the generation result.
544/// Journal entries are written as a flat CSV file (one row per line item)
545/// and as a nested JSON file. Other data is written as JSON files since
546/// many model types contain nested structures.
547#[allow(dead_code)]
548pub fn write_all_output(
549    result: &EnhancedGenerationResult,
550    output_dir: &Path,
551) -> Result<(), Box<dyn std::error::Error>> {
552    write_all_output_with_layout(
553        result,
554        output_dir,
555        datasynth_config::ExportLayout::Nested,
556        &[
557            datasynth_config::FileFormat::Csv,
558            datasynth_config::FileFormat::Json,
559        ],
560        datasynth_config::JeNetworkMethod::default(),
561    )
562}
563
564/// Variant of [`write_all_output_with_layout`] that routes output through
565/// an [`OutputRootConfig`] instead of a raw `&Path`.
566///
567/// Per-entity subtree mode is used by the group-audit shard runner
568/// (v5.0+): the runner sets `per_entity_subtree: true` and
569/// `entity_code: Some(code)` on `root`, and this helper drops each
570/// entity's archive under `{root_dir}/entities/{code}/` so group-wide
571/// artifacts can still live at `{root_dir}/`.
572///
573/// In flat mode (the default for single-entity runs) this is exactly
574/// equivalent to calling [`write_all_output_with_layout`] with
575/// `output_dir = root.root_dir`, so the signature and behavior of the
576/// existing single-entity entrypoints are unchanged.
577#[allow(dead_code)]
578pub fn write_all_output_with_root(
579    result: &EnhancedGenerationResult,
580    root: &OutputRootConfig,
581    export_layout: datasynth_config::ExportLayout,
582    formats: &[datasynth_config::FileFormat],
583) -> Result<(), Box<dyn std::error::Error>> {
584    let effective = root.effective_dir();
585    write_all_output_with_layout(
586        result,
587        &effective,
588        export_layout,
589        formats,
590        datasynth_config::JeNetworkMethod::default(),
591    )
592}
593
594/// Write all generated data with a configurable export layout and format set.
595///
596/// Only writes files for formats present in `formats`. If `formats` is empty,
597/// writes both CSV and JSON (backward compatible). This allows skipping JSON
598/// when only CSV is needed, which halves output time for large datasets.
599pub fn write_all_output_with_layout(
600    result: &EnhancedGenerationResult,
601    output_dir: &Path,
602    export_layout: datasynth_config::ExportLayout,
603    formats: &[datasynth_config::FileFormat],
604    je_network_method: datasynth_config::JeNetworkMethod,
605) -> Result<(), Box<dyn std::error::Error>> {
606    let csv_enabled = formats.is_empty()
607        || formats.contains(&datasynth_config::FileFormat::Csv)
608        || formats.contains(&datasynth_config::FileFormat::Parquet);
609    let json_enabled = formats.is_empty()
610        || formats.contains(&datasynth_config::FileFormat::Json)
611        || formats.contains(&datasynth_config::FileFormat::JsonLines);
612    std::fs::create_dir_all(output_dir)?;
613    info!("Writing comprehensive output to: {}", output_dir.display());
614
615    // Set flat-layout flag for all `write_json_safe` calls in this pass.
616    // Scope guard ensures we reset on return (including error paths).
617    struct FlatLayoutGuard;
618    impl Drop for FlatLayoutGuard {
619        fn drop(&mut self) {
620            FLAT_LAYOUT_ACTIVE.with(|c| c.set(false));
621        }
622    }
623    let _flat_guard = if export_layout == datasynth_config::ExportLayout::Flat {
624        FLAT_LAYOUT_ACTIVE.with(|c| c.set(true));
625        Some(FlatLayoutGuard)
626    } else {
627        None
628    };
629
630    // Set JSON skip flag so `write_json_safe` becomes a no-op when JSON not requested.
631    struct SkipJsonGuard;
632    impl Drop for SkipJsonGuard {
633        fn drop(&mut self) {
634            SKIP_JSON.with(|c| c.set(false));
635        }
636    }
637    let _skip_json_guard = if !json_enabled {
638        SKIP_JSON.with(|c| c.set(true));
639        info!("JSON output skipped (not in requested formats)");
640        Some(SkipJsonGuard)
641    } else {
642        None
643    };
644
645    // ========================================================================
646    // Journal Entries (CSV + JSON in parallel when both enabled)
647    // ========================================================================
648    if !result.journal_entries.is_empty() {
649        let do_csv = csv_enabled;
650        let do_json = json_enabled;
651        let is_flat = export_layout == datasynth_config::ExportLayout::Flat;
652
653        std::thread::scope(|s| {
654            if do_csv {
655                s.spawn(|| {
656                    if let Err(e) = write_journal_entries_csv(result, output_dir) {
657                        warn!("Failed to write journal_entries.csv: {}", e);
658                    }
659                });
660                // v5.8.0 — flat edge-list for accounting-network construction.
661                // Always emit when CSV is requested; cheap relative to the
662                // main JE table.
663                s.spawn(|| {
664                    if let Err(e) = write_je_network_csv(result, output_dir, je_network_method) {
665                        warn!("Failed to write graphs/je_network.csv: {}", e);
666                    }
667                });
668            }
669            if do_json {
670                s.spawn(|| {
671                    if is_flat {
672                        if let Err(e) = write_journal_entries_flat_json(result, output_dir) {
673                            warn!("Failed to write flat journal_entries.json: {}", e);
674                        }
675                    } else if let Err(e) = write_json(
676                        &result.journal_entries,
677                        &output_dir.join("journal_entries.json"),
678                        "Journal entries (JSON)",
679                    ) {
680                        warn!("Failed to write journal_entries.json: {}", e);
681                    }
682                });
683            }
684        });
685    }
686
687    // ========================================================================
688    // Master Data
689    // ========================================================================
690    let md_dir = output_dir.join("master_data");
691    if !result.master_data.vendors.is_empty()
692        || !result.master_data.customers.is_empty()
693        || !result.master_data.materials.is_empty()
694        || !result.master_data.assets.is_empty()
695        || !result.master_data.employees.is_empty()
696        || !result.master_data.cost_centers.is_empty()
697        || !result.master_data.profit_centers.is_empty()
698    {
699        std::fs::create_dir_all(&md_dir)?;
700        info!("Writing master data...");
701
702        write_json_safe(
703            &result.master_data.vendors,
704            &md_dir.join("vendors.json"),
705            "Vendors",
706        );
707        write_json_safe(
708            &result.master_data.customers,
709            &md_dir.join("customers.json"),
710            "Customers",
711        );
712        write_json_safe(
713            &result.master_data.materials,
714            &md_dir.join("materials.json"),
715            "Materials",
716        );
717        write_json_safe(
718            &result.master_data.assets,
719            &md_dir.join("fixed_assets.json"),
720            "Fixed assets",
721        );
722        write_json_safe(
723            &result.master_data.employees,
724            &md_dir.join("employees.json"),
725            "Employees",
726        );
727        write_json_safe(
728            &result.master_data.cost_centers,
729            &md_dir.join("cost_centers.json"),
730            "Cost centers",
731        );
732        // v5.1: profit-centre hierarchy (segments + sub-units).
733        write_json_safe(
734            &result.master_data.profit_centers,
735            &md_dir.join("profit_centers.json"),
736            "Profit centres",
737        );
738        // v3.3.0: organizational profiles (one per company)
739        write_json_safe(
740            &result.master_data.organizational_profiles,
741            &md_dir.join("organizational_profiles.json"),
742            "Organizational profiles (v3.3.0)",
743        );
744    }
745
746    // ========================================================================
747    // Document Flows
748    // ========================================================================
749    let df_dir = output_dir.join("document_flows");
750    let flat_mode = export_layout == datasynth_config::ExportLayout::Flat;
751    if !result.document_flows.purchase_orders.is_empty()
752        || !result.document_flows.sales_orders.is_empty()
753    {
754        std::fs::create_dir_all(&df_dir)?;
755        info!("Writing document flows...");
756
757        write_json_auto(
758            &result.document_flows.purchase_orders,
759            &df_dir.join("purchase_orders.json"),
760            "Purchase orders",
761            flat_mode,
762        );
763        write_json_auto(
764            &result.document_flows.goods_receipts,
765            &df_dir.join("goods_receipts.json"),
766            "Goods receipts",
767            flat_mode,
768        );
769        write_json_auto(
770            &result.document_flows.vendor_invoices,
771            &df_dir.join("vendor_invoices.json"),
772            "Vendor invoices",
773            flat_mode,
774        );
775        write_json_auto(
776            &result.document_flows.payments,
777            &df_dir.join("payments.json"),
778            "Payments",
779            flat_mode,
780        );
781        let customer_receipts: Vec<_> = result
782            .document_flows
783            .payments
784            .iter()
785            .filter(|p| p.payment_type == PaymentType::ArReceipt)
786            .collect();
787        write_json_auto(
788            &customer_receipts,
789            &df_dir.join("customer_receipts.json"),
790            "Customer receipts",
791            flat_mode,
792        );
793        write_json_auto(
794            &result.document_flows.sales_orders,
795            &df_dir.join("sales_orders.json"),
796            "Sales orders",
797            flat_mode,
798        );
799        write_json_auto(
800            &result.document_flows.deliveries,
801            &df_dir.join("deliveries.json"),
802            "Deliveries",
803            flat_mode,
804        );
805        write_json_auto(
806            &result.document_flows.customer_invoices,
807            &df_dir.join("customer_invoices.json"),
808            "Customer invoices",
809            flat_mode,
810        );
811
812        // Document cross-references (PO→GR, GR→Invoice, Invoice→Payment, etc.).
813        // v4.4.2+: inject SDK-friendly `from_type`/`from_id`/`to_type`/`to_id`
814        // aliases so consumers that follow the graph convention see the
815        // types populated. The canonical `source_doc_*`/`target_doc_*`
816        // keys continue to emit unchanged for backwards compatibility.
817        match serde_json::to_value(&result.document_flows.document_references) {
818            Ok(mut v) => {
819                if let Some(arr) = v.as_array_mut() {
820                    for r in arr.iter_mut() {
821                        if let Some(obj) = r.as_object_mut() {
822                            if let Some(st) = obj.get("source_doc_type").cloned() {
823                                obj.entry("from_type").or_insert(st);
824                            }
825                            if let Some(si) = obj.get("source_doc_id").cloned() {
826                                obj.entry("from_id").or_insert(si);
827                            }
828                            if let Some(tt) = obj.get("target_doc_type").cloned() {
829                                obj.entry("to_type").or_insert(tt);
830                            }
831                            if let Some(ti) = obj.get("target_doc_id").cloned() {
832                                obj.entry("to_id").or_insert(ti);
833                            }
834                        }
835                    }
836                }
837                match serde_json::to_string_pretty(&v) {
838                    Ok(json) => {
839                        let path = df_dir.join("document_references.json");
840                        if let Err(e) = std::fs::write(&path, json) {
841                            warn!("Failed to write document references: {}", e);
842                        } else {
843                            info!(
844                                "  Document references written: {} records -> {}",
845                                result.document_flows.document_references.len(),
846                                path.display()
847                            );
848                        }
849                    }
850                    Err(e) => warn!("Failed to serialize document references: {}", e),
851                }
852            }
853            Err(e) => warn!("Failed to build document references Value: {}", e),
854        }
855
856        // Note: P2P/O2C chain types do not implement Serialize, so we log
857        // their counts instead. The individual documents above capture all data.
858        if !result.document_flows.p2p_chains.is_empty() {
859            info!(
860                "  P2P chains: {} (data exported via individual document files)",
861                result.document_flows.p2p_chains.len()
862            );
863        }
864        if !result.document_flows.o2c_chains.is_empty() {
865            info!(
866                "  O2C chains: {} (data exported via individual document files)",
867                result.document_flows.o2c_chains.len()
868            );
869        }
870    }
871
872    // ========================================================================
873    // Subledger
874    // ========================================================================
875    let sl_dir = output_dir.join("subledger");
876    if !result.subledger.ap_invoices.is_empty()
877        || !result.subledger.ar_invoices.is_empty()
878        || !result.subledger.fa_records.is_empty()
879        || !result.subledger.inventory_positions.is_empty()
880    {
881        std::fs::create_dir_all(&sl_dir)?;
882        info!("Writing subledger data...");
883
884        write_json_safe(
885            &result.subledger.ap_invoices,
886            &sl_dir.join("ap_invoices.json"),
887            "AP invoices",
888        );
889        write_json_safe(
890            &result.subledger.ar_invoices,
891            &sl_dir.join("ar_invoices.json"),
892            "AR invoices",
893        );
894        write_json_safe(
895            &result.subledger.fa_records,
896            &sl_dir.join("fa_records.json"),
897            "FA records",
898        );
899        write_json_safe(
900            &result.subledger.inventory_positions,
901            &sl_dir.join("inventory_positions.json"),
902            "Inventory positions",
903        );
904        write_json_safe(
905            &result.subledger.inventory_movements,
906            &sl_dir.join("inventory_movements.json"),
907            "Inventory movements",
908        );
909        write_json_safe(
910            &result.subledger.ar_aging_reports,
911            &sl_dir.join("ar_aging.json"),
912            "AR aging reports",
913        );
914        write_json_safe(
915            &result.subledger.ap_aging_reports,
916            &sl_dir.join("ap_aging.json"),
917            "AP aging reports",
918        );
919        write_json_safe(
920            &result.subledger.depreciation_runs,
921            &sl_dir.join("depreciation_runs.json"),
922            "Depreciation runs",
923        );
924        write_json_safe(
925            &result.subledger.inventory_valuations,
926            &sl_dir.join("inventory_valuation.json"),
927            "Inventory valuations",
928        );
929        // Dunning runs and letters (generated after AR aging)
930        write_json_safe(
931            &result.subledger.dunning_runs,
932            &sl_dir.join("dunning_runs.json"),
933            "Dunning runs",
934        );
935        write_json_safe(
936            &result.subledger.dunning_letters,
937            &sl_dir.join("dunning_letters.json"),
938            "Dunning letters",
939        );
940    }
941
942    // ========================================================================
943    // Audit
944    // ========================================================================
945    let audit_dir = output_dir.join("audit");
946    if !result.audit.engagements.is_empty() {
947        std::fs::create_dir_all(&audit_dir)?;
948        info!("Writing audit data...");
949
950        write_json_safe(
951            &result.audit.engagements,
952            &audit_dir.join("audit_engagements.json"),
953            "Audit engagements",
954        );
955        write_json_safe(
956            &result.audit.audit_scopes,
957            &audit_dir.join("audit_scopes.json"),
958            "Audit scopes (ISA 220 / ISA 300)",
959        );
960        write_json_safe(
961            &result.audit.workpapers,
962            &audit_dir.join("audit_workpapers.json"),
963            "Audit workpapers",
964        );
965        write_json_safe(
966            &result.audit.evidence,
967            &audit_dir.join("audit_evidence.json"),
968            "Audit evidence",
969        );
970        write_json_safe(
971            &result.audit.risk_assessments,
972            &audit_dir.join("audit_risk_assessments.json"),
973            "Audit risk assessments",
974        );
975        write_json_safe(
976            &result.audit.findings,
977            &audit_dir.join("audit_findings.json"),
978            "Audit findings",
979        );
980        write_json_safe(
981            &result.audit.judgments,
982            &audit_dir.join("audit_judgments.json"),
983            "Audit judgments",
984        );
985        write_json_safe(
986            &result.audit.confirmations,
987            &audit_dir.join("audit_confirmations.json"),
988            "Audit confirmations",
989        );
990        write_json_safe(
991            &result.audit.confirmation_responses,
992            &audit_dir.join("audit_confirmation_responses.json"),
993            "Audit confirmation responses",
994        );
995        write_json_safe(
996            &result.audit.procedure_steps,
997            &audit_dir.join("audit_procedure_steps.json"),
998            "Audit procedure steps",
999        );
1000        write_json_safe(
1001            &result.audit.samples,
1002            &audit_dir.join("audit_samples.json"),
1003            "Audit samples",
1004        );
1005        write_json_safe(
1006            &result.audit.analytical_results,
1007            &audit_dir.join("audit_analytical_results.json"),
1008            "Audit analytical results",
1009        );
1010        write_json_safe(
1011            &result.audit.ia_functions,
1012            &audit_dir.join("audit_ia_functions.json"),
1013            "Audit IA functions",
1014        );
1015        write_json_safe(
1016            &result.audit.ia_reports,
1017            &audit_dir.join("audit_ia_reports.json"),
1018            "Audit IA reports",
1019        );
1020        write_json_safe(
1021            &result.audit.related_parties,
1022            &audit_dir.join("audit_related_parties.json"),
1023            "Audit related parties",
1024        );
1025        write_json_safe(
1026            &result.audit.related_party_transactions,
1027            &audit_dir.join("audit_related_party_transactions.json"),
1028            "Audit related party transactions",
1029        );
1030        // ISA 600: Group audit artefacts
1031        if !result.audit.component_auditors.is_empty() {
1032            write_json_safe(
1033                &result.audit.component_auditors,
1034                &audit_dir.join("component_auditors.json"),
1035                "Component auditors (ISA 600)",
1036            );
1037            if let Some(plan) = &result.audit.group_audit_plan {
1038                write_json_single_safe(
1039                    plan,
1040                    &audit_dir.join("group_audit_plan.json"),
1041                    "Group audit plan (ISA 600)",
1042                );
1043            }
1044            write_json_safe(
1045                &result.audit.component_instructions,
1046                &audit_dir.join("component_instructions.json"),
1047                "Component instructions (ISA 600)",
1048            );
1049            write_json_safe(
1050                &result.audit.component_reports,
1051                &audit_dir.join("component_reports.json"),
1052                "Component auditor reports (ISA 600)",
1053            );
1054        }
1055        // ISA 210: Engagement letters
1056        write_json_safe(
1057            &result.audit.engagement_letters,
1058            &audit_dir.join("engagement_letters.json"),
1059            "Engagement letters (ISA 210)",
1060        );
1061        // ISA 560 / IAS 10: Subsequent events
1062        write_json_safe(
1063            &result.audit.subsequent_events,
1064            &audit_dir.join("subsequent_events.json"),
1065            "Subsequent events (ISA 560 / IAS 10)",
1066        );
1067        // ISA 402: Service organization controls
1068        write_json_safe(
1069            &result.audit.service_organizations,
1070            &audit_dir.join("service_organizations.json"),
1071            "Service organizations (ISA 402)",
1072        );
1073        write_json_safe(
1074            &result.audit.soc_reports,
1075            &audit_dir.join("soc_reports.json"),
1076            "SOC reports (ISA 402)",
1077        );
1078        write_json_safe(
1079            &result.audit.user_entity_controls,
1080            &audit_dir.join("user_entity_controls.json"),
1081            "User entity controls (ISA 402)",
1082        );
1083
1084        // ISA 570: Going concern assessments
1085        write_json_safe(
1086            &result.audit.going_concern_assessments,
1087            &audit_dir.join("going_concern_assessments.json"),
1088            "Going concern assessments (ISA 570)",
1089        );
1090
1091        // ISA 540: Accounting estimates
1092        write_json_safe(
1093            &result.audit.accounting_estimates,
1094            &audit_dir.join("accounting_estimates.json"),
1095            "Accounting estimates (ISA 540)",
1096        );
1097
1098        // ISA 700/701/705/706: Audit opinions and Key Audit Matters.
1099        // Always write even if the vec is empty; see the always-emit block
1100        // below (outside the `engagements.is_empty()` guard) for the case
1101        // where audit is entirely disabled — the files still appear in the
1102        // archive with `[]` so SDK consumers don't get 404s on the manifest.
1103        write_json_always(
1104            &result.audit.audit_opinions,
1105            &audit_dir.join("audit_opinions.json"),
1106            "Audit opinions (ISA 700/705/706)",
1107        );
1108        write_json_always(
1109            &result.audit.key_audit_matters,
1110            &audit_dir.join("key_audit_matters.json"),
1111            "Key Audit Matters (ISA 701)",
1112        );
1113
1114        // SOX 302 / 404
1115        if !result.audit.sox_302_certifications.is_empty() {
1116            write_json_safe(
1117                &result.audit.sox_302_certifications,
1118                &audit_dir.join("sox_302_certifications.json"),
1119                "SOX 302 certifications",
1120            );
1121            write_json_safe(
1122                &result.audit.sox_404_assessments,
1123                &audit_dir.join("sox_404_assessments.json"),
1124                "SOX 404 ICFR assessments",
1125            );
1126        }
1127
1128        // ISA 320: Materiality calculations
1129        if !result.audit.materiality_calculations.is_empty() {
1130            write_json_safe(
1131                &result.audit.materiality_calculations,
1132                &audit_dir.join("materiality_calculations.json"),
1133                "Materiality calculations (ISA 320)",
1134            );
1135        }
1136
1137        // ISA 315: Combined Risk Assessments
1138        if !result.audit.combined_risk_assessments.is_empty() {
1139            write_json_safe(
1140                &result.audit.combined_risk_assessments,
1141                &audit_dir.join("combined_risk_assessments.json"),
1142                "Combined Risk Assessments (ISA 315)",
1143            );
1144        }
1145
1146        // ISA 530: Sampling Plans and Sampled Items
1147        if !result.audit.sampling_plans.is_empty() {
1148            write_json_safe(
1149                &result.audit.sampling_plans,
1150                &audit_dir.join("sampling_plans.json"),
1151                "Sampling plans (ISA 530)",
1152            );
1153            write_json_safe(
1154                &result.audit.sampled_items,
1155                &audit_dir.join("sampled_items.json"),
1156                "Sampled items (ISA 530)",
1157            );
1158        }
1159
1160        // ISA 315: Significant Classes of Transactions (SCOTS)
1161        if !result.audit.significant_transaction_classes.is_empty() {
1162            write_json_safe(
1163                &result.audit.significant_transaction_classes,
1164                &audit_dir.join("significant_transaction_classes.json"),
1165                "Significant Classes of Transactions / SCOTS (ISA 315)",
1166            );
1167        }
1168
1169        // ISA 520: Unusual Item Markers
1170        if !result.audit.unusual_items.is_empty() {
1171            write_json_safe(
1172                &result.audit.unusual_items,
1173                &audit_dir.join("unusual_items.json"),
1174                "Unusual item flags (ISA 520)",
1175            );
1176        }
1177
1178        // ISA 520: Analytical Relationships
1179        if !result.audit.analytical_relationships.is_empty() {
1180            write_json_safe(
1181                &result.audit.analytical_relationships,
1182                &audit_dir.join("analytical_relationships.json"),
1183                "Analytical relationships (ISA 520)",
1184            );
1185        }
1186
1187        // PCAOB-ISA cross-reference mappings
1188        if !result.audit.isa_pcaob_mappings.is_empty() {
1189            write_json_safe(
1190                &result.audit.isa_pcaob_mappings,
1191                &audit_dir.join("isa_pcaob_mappings.json"),
1192                "PCAOB-ISA standard mappings",
1193            );
1194        }
1195
1196        // ISA standard reference (number, title, series for all 34 ISA standards)
1197        if !result.audit.isa_mappings.is_empty() {
1198            write_json_safe(
1199                &result.audit.isa_mappings,
1200                &audit_dir.join("isa_mappings.json"),
1201                "ISA standard reference mappings",
1202            );
1203        }
1204
1205        // FSM event trail (when audit.fsm.enabled: true)
1206        if let Some(ref event_trail) = result.audit.fsm_event_trail {
1207            if !event_trail.is_empty() {
1208                write_json_safe(
1209                    event_trail,
1210                    &audit_dir.join("fsm_event_trail.json"),
1211                    "FSM audit event trail",
1212                );
1213            }
1214        }
1215
1216        // v3.3.0: legal documents (when compliance_regulations.legal_documents.enabled)
1217        write_json_safe(
1218            &result.audit.legal_documents,
1219            &audit_dir.join("legal_documents.json"),
1220            "Legal documents (v3.3.0)",
1221        );
1222
1223        // v3.3.0: IT general controls — access logs + change records
1224        write_json_safe(
1225            &result.audit.it_controls_access_logs,
1226            &audit_dir.join("it_controls_access_logs.json"),
1227            "IT general controls — access logs (v3.3.0)",
1228        );
1229        write_json_safe(
1230            &result.audit.it_controls_change_records,
1231            &audit_dir.join("it_controls_change_records.json"),
1232            "IT general controls — change management records (v3.3.0)",
1233        );
1234    } else {
1235        // Audit phase disabled or ran with no engagements — still emit
1236        // audit_opinions.json + key_audit_matters.json so the archive
1237        // structure is consistent and SDK consumers can rely on these
1238        // files always existing. v3.1 announced these as archive-shipping
1239        // files; v3.1.1 guarantees it regardless of audit.enabled.
1240        std::fs::create_dir_all(&audit_dir)?;
1241        write_json_always(
1242            &result.audit.audit_opinions,
1243            &audit_dir.join("audit_opinions.json"),
1244            "Audit opinions (ISA 700/705/706) — empty (audit phase disabled)",
1245        );
1246        write_json_always(
1247            &result.audit.key_audit_matters,
1248            &audit_dir.join("key_audit_matters.json"),
1249            "Key Audit Matters (ISA 701) — empty (audit phase disabled)",
1250        );
1251    }
1252
1253    // ========================================================================
1254    // Banking (JSON - keep existing format for backward compat)
1255    // ========================================================================
1256    let banking_dir = output_dir.join("banking");
1257    if !result.banking.customers.is_empty() {
1258        std::fs::create_dir_all(&banking_dir)?;
1259        info!("Writing banking data...");
1260
1261        // v4.4.2: dual-key risk tier. SDK consumers inspect `risk_level`;
1262        // the struct stores it as `risk_tier` for historical reasons.
1263        // Serialize through a `serde_json::Value` so we can inject the
1264        // `risk_level` alias key on every customer row without touching
1265        // the `BankingCustomer` Serialize impl (which has 40+ fields).
1266        match serde_json::to_value(&result.banking.customers) {
1267            Ok(mut v) => {
1268                if let Some(arr) = v.as_array_mut() {
1269                    for c in arr.iter_mut() {
1270                        if let Some(obj) = c.as_object_mut() {
1271                            if let Some(rt) = obj.get("risk_tier").cloned() {
1272                                obj.entry("risk_level").or_insert(rt);
1273                            }
1274                        }
1275                    }
1276                }
1277                match serde_json::to_string_pretty(&v) {
1278                    Ok(json) => {
1279                        let path = banking_dir.join("banking_customers.json");
1280                        if let Err(e) = std::fs::write(&path, json) {
1281                            warn!("Failed to write banking_customers.json: {}", e);
1282                        } else {
1283                            info!(
1284                                "  Banking customers written: {} records -> {}",
1285                                result.banking.customers.len(),
1286                                path.display()
1287                            );
1288                        }
1289                    }
1290                    Err(e) => warn!("Failed to serialize banking customers: {}", e),
1291                }
1292            }
1293            Err(e) => warn!("Failed to build banking customers Value: {}", e),
1294        }
1295        write_json_safe(
1296            &result.banking.accounts,
1297            &banking_dir.join("banking_accounts.json"),
1298            "Banking accounts",
1299        );
1300        write_json_safe(
1301            &result.banking.transactions,
1302            &banking_dir.join("banking_transactions.json"),
1303            "Banking transactions",
1304        );
1305        write_json_safe(
1306            &result.banking.transaction_labels,
1307            &banking_dir.join("aml_transaction_labels.json"),
1308            "AML transaction labels",
1309        );
1310        write_json_safe(
1311            &result.banking.customer_labels,
1312            &banking_dir.join("aml_customer_labels.json"),
1313            "AML customer labels",
1314        );
1315        write_json_safe(
1316            &result.banking.account_labels,
1317            &banking_dir.join("aml_account_labels.json"),
1318            "AML account labels",
1319        );
1320        write_json_safe(
1321            &result.banking.relationship_labels,
1322            &banking_dir.join("aml_relationship_labels.json"),
1323            "AML relationship labels",
1324        );
1325        write_json_safe(
1326            &result.banking.narratives,
1327            &banking_dir.join("aml_narratives.json"),
1328            "AML narratives",
1329        );
1330    }
1331
1332    // ========================================================================
1333    // Sourcing (S2C)
1334    // ========================================================================
1335    let s2c_dir = output_dir.join("sourcing");
1336    if !result.sourcing.spend_analyses.is_empty() || !result.sourcing.sourcing_projects.is_empty() {
1337        std::fs::create_dir_all(&s2c_dir)?;
1338        info!("Writing sourcing (S2C) data...");
1339
1340        write_json_safe(
1341            &result.sourcing.spend_analyses,
1342            &s2c_dir.join("spend_analyses.json"),
1343            "Spend analyses",
1344        );
1345        write_json_safe(
1346            &result.sourcing.sourcing_projects,
1347            &s2c_dir.join("sourcing_projects.json"),
1348            "Sourcing projects",
1349        );
1350        write_json_safe(
1351            &result.sourcing.qualifications,
1352            &s2c_dir.join("supplier_qualifications.json"),
1353            "Supplier qualifications",
1354        );
1355        write_json_safe(
1356            &result.sourcing.rfx_events,
1357            &s2c_dir.join("rfx_events.json"),
1358            "RFx events",
1359        );
1360        write_json_safe(
1361            &result.sourcing.bids,
1362            &s2c_dir.join("supplier_bids.json"),
1363            "Supplier bids",
1364        );
1365        write_json_safe(
1366            &result.sourcing.bid_evaluations,
1367            &s2c_dir.join("bid_evaluations.json"),
1368            "Bid evaluations",
1369        );
1370        write_json_safe(
1371            &result.sourcing.contracts,
1372            &s2c_dir.join("procurement_contracts.json"),
1373            "Procurement contracts",
1374        );
1375        write_json_safe(
1376            &result.sourcing.catalog_items,
1377            &s2c_dir.join("catalog_items.json"),
1378            "Catalog items",
1379        );
1380        write_json_safe(
1381            &result.sourcing.scorecards,
1382            &s2c_dir.join("supplier_scorecards.json"),
1383            "Supplier scorecards",
1384        );
1385    }
1386
1387    // ========================================================================
1388    // Intercompany
1389    // ========================================================================
1390    let ic_dir = output_dir.join("intercompany");
1391    if result.intercompany.group_structure.is_some()
1392        || !result.intercompany.matched_pairs.is_empty()
1393    {
1394        std::fs::create_dir_all(&ic_dir)?;
1395        info!("Writing intercompany data...");
1396
1397        // Always write group structure when present (independent of IC transactions).
1398        if let Some(gs) = &result.intercompany.group_structure {
1399            write_json_single_safe(gs, &ic_dir.join("group_structure.json"), "Group structure");
1400        }
1401
1402        write_json_safe(
1403            &result.intercompany.matched_pairs,
1404            &ic_dir.join("ic_matched_pairs.json"),
1405            "IC matched pairs",
1406        );
1407        write_json_safe(
1408            &result.intercompany.seller_journal_entries,
1409            &ic_dir.join("ic_seller_journal_entries.json"),
1410            "IC seller journal entries",
1411        );
1412        write_json_safe(
1413            &result.intercompany.buyer_journal_entries,
1414            &ic_dir.join("ic_buyer_journal_entries.json"),
1415            "IC buyer journal entries",
1416        );
1417        write_json_safe(
1418            &result.intercompany.elimination_entries,
1419            &ic_dir.join("ic_elimination_entries.json"),
1420            "IC elimination entries",
1421        );
1422
1423        // NCI measurements from group structure ownership percentages
1424        if !result.intercompany.nci_measurements.is_empty() {
1425            write_json_safe(
1426                &result.intercompany.nci_measurements,
1427                &ic_dir.join("nci_measurements.json"),
1428                "NCI measurements",
1429            );
1430        }
1431    }
1432
1433    // ========================================================================
1434    // Financial Reporting
1435    // ========================================================================
1436    let fin_dir = output_dir.join("financial_reporting");
1437    if !result.financial_reporting.financial_statements.is_empty()
1438        || !result.financial_reporting.bank_reconciliations.is_empty()
1439        || !result
1440            .financial_reporting
1441            .consolidated_statements
1442            .is_empty()
1443    {
1444        std::fs::create_dir_all(&fin_dir)?;
1445        info!("Writing financial reporting data...");
1446
1447        // Legacy flat file (all standalone statements combined)
1448        write_json_safe(
1449            &result.financial_reporting.financial_statements,
1450            &fin_dir.join("financial_statements.json"),
1451            "Financial statements",
1452        );
1453
1454        // Per-entity standalone statements
1455        if !result.financial_reporting.standalone_statements.is_empty() {
1456            let standalone_dir = fin_dir.join("standalone");
1457            std::fs::create_dir_all(&standalone_dir)?;
1458            for (entity_code, stmts) in &result.financial_reporting.standalone_statements {
1459                let file_name = format!("{}_financial_statements.json", entity_code);
1460                write_json_safe(
1461                    stmts,
1462                    &standalone_dir.join(&file_name),
1463                    &format!("Standalone statements for {}", entity_code),
1464                );
1465            }
1466        }
1467
1468        // Consolidated statements + schedule
1469        if !result
1470            .financial_reporting
1471            .consolidated_statements
1472            .is_empty()
1473            || !result
1474                .financial_reporting
1475                .consolidation_schedules
1476                .is_empty()
1477        {
1478            let consolidated_dir = fin_dir.join("consolidated");
1479            std::fs::create_dir_all(&consolidated_dir)?;
1480            write_json_safe(
1481                &result.financial_reporting.consolidated_statements,
1482                &consolidated_dir.join("consolidated_financial_statements.json"),
1483                "Consolidated financial statements",
1484            );
1485            write_json_safe(
1486                &result.financial_reporting.consolidation_schedules,
1487                &consolidated_dir.join("consolidation_schedule.json"),
1488                "Consolidation schedule",
1489            );
1490        }
1491
1492        write_json_safe(
1493            &result.financial_reporting.bank_reconciliations,
1494            &fin_dir.join("bank_reconciliations.json"),
1495            "Bank reconciliations",
1496        );
1497
1498        // IFRS 8 / ASC 280 Segment Reporting
1499        if !result.financial_reporting.segment_reports.is_empty()
1500            || !result
1501                .financial_reporting
1502                .segment_reconciliations
1503                .is_empty()
1504        {
1505            let seg_dir = fin_dir.join("segment_reporting");
1506            std::fs::create_dir_all(&seg_dir)?;
1507            write_json_safe(
1508                &result.financial_reporting.segment_reports,
1509                &seg_dir.join("segment_reports.json"),
1510                "Segment reports",
1511            );
1512            write_json_safe(
1513                &result.financial_reporting.segment_reconciliations,
1514                &seg_dir.join("segment_reconciliations.json"),
1515                "Segment reconciliations",
1516            );
1517        }
1518
1519        // IAS 1 / ASC 235: Notes to financial statements
1520        write_json_safe(
1521            &result.financial_reporting.notes_to_financial_statements,
1522            &fin_dir.join("notes_to_financial_statements.json"),
1523            "Notes to financial statements",
1524        );
1525    }
1526
1527    // ========================================================================
1528    // Period-Close Trial Balances
1529    // ========================================================================
1530    //
1531    // v5.1: convert each in-memory `PeriodTrialBalance` to the
1532    // canonical `datasynth_core::models::balance::TrialBalance` before
1533    // writing.  The on-disk shape is now identical to what the group
1534    // aggregate phase loads via `tb_loader::load_entity_trial_balance`,
1535    // so the loader's v5.0 dual-shape detection (`PeriodTrialBalanceOnDisk`
1536    // → `TrialBalance` synthesis) is no longer required.
1537    if !result.financial_reporting.trial_balances.is_empty() {
1538        let pc_dir = output_dir.join("period_close");
1539        std::fs::create_dir_all(&pc_dir)?;
1540        info!(
1541            "Writing {} period-close trial balances...",
1542            result.financial_reporting.trial_balances.len()
1543        );
1544        // Pick the first JE's company_code + currency as the
1545        // canonical identifiers; the orchestrator only emits one TB
1546        // per period (gated by `if company_idx == 0` at the push
1547        // site), so all trial-balance entries belong to that company.
1548        // Fallback to safe defaults when the JE list is empty
1549        // (effectively only test fixtures).
1550        let (company_code, currency) = result
1551            .journal_entries
1552            .first()
1553            .map(|je| (je.header.company_code.as_str(), je.header.currency.as_str()))
1554            .unwrap_or(("UNKNOWN", "USD"));
1555        let canonical: Vec<datasynth_core::models::balance::TrialBalance> = result
1556            .financial_reporting
1557            .trial_balances
1558            .iter()
1559            .cloned()
1560            .map(|tb| tb.into_canonical(company_code, currency))
1561            .collect();
1562        write_json_safe(
1563            &canonical,
1564            &pc_dir.join("trial_balances.json"),
1565            "Period-close trial balances (canonical)",
1566        );
1567    }
1568
1569    // ========================================================================
1570    // Balance: Opening Balances + GL-Subledger Reconciliation
1571    // ========================================================================
1572    if !result.opening_balances.is_empty() || !result.subledger_reconciliation.is_empty() {
1573        let balance_dir = output_dir.join("balance");
1574        std::fs::create_dir_all(&balance_dir)?;
1575        info!("Writing balance data...");
1576
1577        write_json_safe(
1578            &result.opening_balances,
1579            &balance_dir.join("opening_balances.json"),
1580            "Opening balances",
1581        );
1582        write_json_safe(
1583            &result.subledger_reconciliation,
1584            &balance_dir.join("subledger_reconciliation.json"),
1585            "Subledger reconciliation",
1586        );
1587    }
1588
1589    // ========================================================================
1590    // HR (Payroll, Time Entries, Expense Reports, Benefit Enrollments, Pensions)
1591    // ========================================================================
1592    let hr_dir = output_dir.join("hr");
1593    if !result.hr.payroll_runs.is_empty()
1594        || !result.hr.time_entries.is_empty()
1595        || !result.hr.expense_reports.is_empty()
1596        || !result.hr.benefit_enrollments.is_empty()
1597        || !result.hr.pension_plans.is_empty()
1598        || !result.hr.stock_grants.is_empty()
1599        || !result.master_data.employee_change_history.is_empty()
1600    {
1601        std::fs::create_dir_all(&hr_dir)?;
1602        info!("Writing HR data...");
1603
1604        write_json_safe(
1605            &result.hr.payroll_runs,
1606            &hr_dir.join("payroll_runs.json"),
1607            "Payroll runs",
1608        );
1609        write_json_safe(
1610            &result.hr.payroll_line_items,
1611            &hr_dir.join("payroll_line_items.json"),
1612            "Payroll line items",
1613        );
1614        write_json_safe(
1615            &result.hr.time_entries,
1616            &hr_dir.join("time_entries.json"),
1617            "Time entries",
1618        );
1619        write_json_safe(
1620            &result.hr.expense_reports,
1621            &hr_dir.join("expense_reports.json"),
1622            "Expense reports",
1623        );
1624        write_json_safe(
1625            &result.hr.benefit_enrollments,
1626            &hr_dir.join("benefit_enrollments.json"),
1627            "Benefit enrollments",
1628        );
1629        write_json_safe(
1630            &result.hr.pension_plans,
1631            &hr_dir.join("pension_plans.json"),
1632            "Pension plans",
1633        );
1634        write_json_safe(
1635            &result.hr.pension_obligations,
1636            &hr_dir.join("pension_obligations.json"),
1637            "Pension obligations",
1638        );
1639        write_json_safe(
1640            &result.hr.pension_plan_assets,
1641            &hr_dir.join("plan_assets.json"),
1642            "Plan assets",
1643        );
1644        write_json_safe(
1645            &result.hr.pension_disclosures,
1646            &hr_dir.join("pension_disclosures.json"),
1647            "Pension disclosures",
1648        );
1649        write_json_safe(
1650            &result.hr.stock_grants,
1651            &hr_dir.join("stock_grants.json"),
1652            "Stock grants",
1653        );
1654        write_json_safe(
1655            &result.hr.stock_comp_expenses,
1656            &hr_dir.join("stock_comp_expense.json"),
1657            "Stock comp expense",
1658        );
1659        write_json_safe(
1660            &result.master_data.employee_change_history,
1661            &hr_dir.join("employee_change_history.json"),
1662            "Employee change history",
1663        );
1664    }
1665
1666    // ========================================================================
1667    // Manufacturing
1668    // ========================================================================
1669    let mfg_dir = output_dir.join("manufacturing");
1670    if !result.manufacturing.production_orders.is_empty()
1671        || !result.manufacturing.quality_inspections.is_empty()
1672        || !result.manufacturing.cycle_counts.is_empty()
1673        || !result.manufacturing.bom_components.is_empty()
1674        || !result.manufacturing.inventory_movements.is_empty()
1675    {
1676        std::fs::create_dir_all(&mfg_dir)?;
1677        info!("Writing manufacturing data...");
1678
1679        write_json_safe(
1680            &result.manufacturing.production_orders,
1681            &mfg_dir.join("production_orders.json"),
1682            "Production orders",
1683        );
1684        write_json_safe(
1685            &result.manufacturing.quality_inspections,
1686            &mfg_dir.join("quality_inspections.json"),
1687            "Quality inspections",
1688        );
1689        write_json_safe(
1690            &result.manufacturing.cycle_counts,
1691            &mfg_dir.join("cycle_counts.json"),
1692            "Cycle counts",
1693        );
1694        write_json_safe(
1695            &result.manufacturing.bom_components,
1696            &mfg_dir.join("bom_components.json"),
1697            "BOM components",
1698        );
1699        write_json_safe(
1700            &result.manufacturing.inventory_movements,
1701            &mfg_dir.join("inventory_movements.json"),
1702            "Inventory movements",
1703        );
1704    }
1705
1706    // ========================================================================
1707    // Sales, KPIs, Budgets
1708    // ========================================================================
1709    let sales_dir = output_dir.join("sales_kpi_budgets");
1710    if !result.sales_kpi_budgets.sales_quotes.is_empty()
1711        || !result.sales_kpi_budgets.kpis.is_empty()
1712        || !result.sales_kpi_budgets.budgets.is_empty()
1713    {
1714        std::fs::create_dir_all(&sales_dir)?;
1715        info!("Writing sales, KPI, and budget data...");
1716
1717        write_json_safe(
1718            &result.sales_kpi_budgets.sales_quotes,
1719            &sales_dir.join("sales_quotes.json"),
1720            "Sales quotes",
1721        );
1722        write_json_safe(
1723            &result.sales_kpi_budgets.kpis,
1724            &sales_dir.join("management_kpis.json"),
1725            "Management KPIs",
1726        );
1727        write_json_safe(
1728            &result.sales_kpi_budgets.budgets,
1729            &sales_dir.join("budgets.json"),
1730            "Budgets",
1731        );
1732    }
1733
1734    // ========================================================================
1735    // Tax
1736    // ========================================================================
1737    let tax_dir = output_dir.join("tax");
1738    if !result.tax.jurisdictions.is_empty()
1739        || !result.tax.codes.is_empty()
1740        || !result.tax.tax_provisions.is_empty()
1741    {
1742        std::fs::create_dir_all(&tax_dir)?;
1743        info!("Writing tax data...");
1744
1745        write_json_safe(
1746            &result.tax.jurisdictions,
1747            &tax_dir.join("tax_jurisdictions.json"),
1748            "Tax jurisdictions",
1749        );
1750        write_json_safe(
1751            &result.tax.codes,
1752            &tax_dir.join("tax_codes.json"),
1753            "Tax codes",
1754        );
1755        write_json_safe(
1756            &result.tax.tax_provisions,
1757            &tax_dir.join("tax_provisions.json"),
1758            "Tax provisions",
1759        );
1760        write_json_safe(
1761            &result.tax.tax_lines,
1762            &tax_dir.join("tax_lines.json"),
1763            "Tax lines",
1764        );
1765        write_json_safe(
1766            &result.tax.tax_returns,
1767            &tax_dir.join("tax_returns.json"),
1768            "Tax returns",
1769        );
1770        write_json_safe(
1771            &result.tax.withholding_records,
1772            &tax_dir.join("withholding_records.json"),
1773            "Withholding tax records",
1774        );
1775        if !result.tax.tax_anomaly_labels.is_empty() {
1776            write_json_safe(
1777                &result.tax.tax_anomaly_labels,
1778                &tax_dir.join("tax_anomaly_labels.json"),
1779                "Tax anomaly labels",
1780            );
1781        }
1782        // Deferred tax engine output (IAS 12 / ASC 740)
1783        if !result.tax.deferred_tax.temporary_differences.is_empty() {
1784            write_json_safe(
1785                &result.tax.deferred_tax.temporary_differences,
1786                &tax_dir.join("temporary_differences.json"),
1787                "Temporary differences",
1788            );
1789            write_json_safe(
1790                &result.tax.deferred_tax.etr_reconciliations,
1791                &tax_dir.join("etr_reconciliation.json"),
1792                "ETR reconciliation",
1793            );
1794            write_json_safe(
1795                &result.tax.deferred_tax.rollforwards,
1796                &tax_dir.join("deferred_tax_rollforward.json"),
1797                "Deferred tax rollforward",
1798            );
1799            write_json_safe(
1800                &result.tax.deferred_tax.journal_entries,
1801                &tax_dir.join("deferred_tax_journal_entries.json"),
1802                "Deferred tax journal entries",
1803            );
1804        }
1805    }
1806
1807    // ========================================================================
1808    // ESG
1809    // ========================================================================
1810    let esg_dir = output_dir.join("esg");
1811    if !result.esg.emissions.is_empty()
1812        || !result.esg.energy.is_empty()
1813        || !result.esg.diversity.is_empty()
1814        || !result.esg.governance.is_empty()
1815    {
1816        std::fs::create_dir_all(&esg_dir)?;
1817        info!("Writing ESG data...");
1818
1819        write_json_safe(
1820            &result.esg.emissions,
1821            &esg_dir.join("emission_records.json"),
1822            "Emission records",
1823        );
1824        write_json_safe(
1825            &result.esg.energy,
1826            &esg_dir.join("energy_consumption.json"),
1827            "Energy consumption",
1828        );
1829        write_json_safe(
1830            &result.esg.water,
1831            &esg_dir.join("water_usage.json"),
1832            "Water usage",
1833        );
1834        write_json_safe(
1835            &result.esg.waste,
1836            &esg_dir.join("waste_records.json"),
1837            "Waste records",
1838        );
1839        write_json_safe(
1840            &result.esg.diversity,
1841            &esg_dir.join("workforce_diversity.json"),
1842            "Workforce diversity",
1843        );
1844        write_json_safe(
1845            &result.esg.pay_equity,
1846            &esg_dir.join("pay_equity.json"),
1847            "Pay equity",
1848        );
1849        write_json_safe(
1850            &result.esg.safety_incidents,
1851            &esg_dir.join("safety_incidents.json"),
1852            "Safety incidents",
1853        );
1854        write_json_safe(
1855            &result.esg.safety_metrics,
1856            &esg_dir.join("safety_metrics.json"),
1857            "Safety metrics",
1858        );
1859        write_json_safe(
1860            &result.esg.governance,
1861            &esg_dir.join("governance_metrics.json"),
1862            "Governance metrics",
1863        );
1864        write_json_safe(
1865            &result.esg.supplier_assessments,
1866            &esg_dir.join("supplier_esg_assessments.json"),
1867            "Supplier ESG assessments",
1868        );
1869        write_json_safe(
1870            &result.esg.materiality,
1871            &esg_dir.join("materiality_assessments.json"),
1872            "Materiality assessments",
1873        );
1874        write_json_safe(
1875            &result.esg.disclosures,
1876            &esg_dir.join("esg_disclosures.json"),
1877            "ESG disclosures",
1878        );
1879        write_json_safe(
1880            &result.esg.climate_scenarios,
1881            &esg_dir.join("climate_scenarios.json"),
1882            "Climate scenarios",
1883        );
1884        write_json_safe(
1885            &result.esg.anomaly_labels,
1886            &esg_dir.join("esg_anomaly_labels.json"),
1887            "ESG anomaly labels",
1888        );
1889    }
1890
1891    // ========================================================================
1892    // Process Mining (OCPM)
1893    // ========================================================================
1894    if let Some(ref event_log) = result.ocpm.event_log {
1895        if !event_log.events.is_empty() || !event_log.objects.is_empty() {
1896            let pm_dir = output_dir.join("process_mining");
1897            std::fs::create_dir_all(&pm_dir)?;
1898            info!("Writing process mining (OCPM) data...");
1899
1900            // Write the full OCEL 2.0 event log. v4.4.2+ patches every
1901            // `object_refs[*].object_type_id` with a companion
1902            // `object_type` key, matching the OCEL 2.0 spec and SDK
1903            // consumer expectations that previously saw `object_type`
1904            // arrive as null. See `add_ocel_object_type_alias` below.
1905            match serde_json::to_value(event_log) {
1906                Ok(mut v) => {
1907                    add_ocel_object_type_alias(&mut v);
1908                    match serde_json::to_string_pretty(&v) {
1909                        Ok(json) => {
1910                            if let Err(e) = std::fs::write(pm_dir.join("event_log.json"), json) {
1911                                warn!("Failed to write OCPM event log: {}", e);
1912                            } else {
1913                                info!(
1914                                    "  Event log written: {} events, {} objects",
1915                                    result.ocpm.event_count, result.ocpm.object_count
1916                                );
1917                            }
1918                        }
1919                        Err(e) => warn!("Failed to serialize OCPM event log: {}", e),
1920                    }
1921                }
1922                Err(e) => warn!("Failed to build OCPM event log Value: {}", e),
1923            }
1924
1925            // Write events separately for easy consumption
1926            if !event_log.events.is_empty() {
1927                match serde_json::to_string_pretty(&event_log.events) {
1928                    Ok(json) => {
1929                        if let Err(e) = std::fs::write(pm_dir.join("events.json"), json) {
1930                            warn!("Failed to write OCPM events: {}", e);
1931                        } else {
1932                            info!("  Events written: {} records", event_log.events.len());
1933                        }
1934                    }
1935                    Err(e) => warn!("Failed to serialize OCPM events: {}", e),
1936                }
1937            }
1938
1939            // Write objects separately for easy consumption
1940            if !event_log.objects.is_empty() {
1941                let objects: Vec<&_> = event_log.objects.iter().collect();
1942                match serde_json::to_string_pretty(&objects) {
1943                    Ok(json) => {
1944                        if let Err(e) = std::fs::write(pm_dir.join("objects.json"), json) {
1945                            warn!("Failed to write OCPM objects: {}", e);
1946                        } else {
1947                            info!("  Objects written: {} records", event_log.objects.len());
1948                        }
1949                    }
1950                    Err(e) => warn!("Failed to serialize OCPM objects: {}", e),
1951                }
1952            }
1953
1954            // Write process variants if any were computed
1955            if !event_log.variants.is_empty() {
1956                let variants: Vec<&_> = event_log.variants.values().collect();
1957                match serde_json::to_string_pretty(&variants) {
1958                    Ok(json) => {
1959                        if let Err(e) = std::fs::write(pm_dir.join("process_variants.json"), json) {
1960                            warn!("Failed to write process variants: {}", e);
1961                        } else {
1962                            info!(
1963                                "  Process variants written: {} variants",
1964                                event_log.variants.len()
1965                            );
1966                        }
1967                    }
1968                    Err(e) => warn!("Failed to serialize process variants: {}", e),
1969                }
1970            }
1971        }
1972    }
1973
1974    // ========================================================================
1975    // Chart of Accounts
1976    // ========================================================================
1977    // Primary file: flat array of accounts (shape stable since v3.x —
1978    // consumers iterate over it).
1979    match serde_json::to_string_pretty(&result.chart_of_accounts.accounts) {
1980        Ok(json) => {
1981            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts.json"), json) {
1982                warn!("Failed to write chart of accounts: {}", e);
1983            } else {
1984                info!("  Chart of accounts written");
1985            }
1986        }
1987        Err(e) => warn!("Failed to serialize chart of accounts: {}", e),
1988    }
1989    // v4.4.1 — companion metadata file so SDK consumers can read the
1990    // accounting framework + complexity + ID without having to infer
1991    // them from each account row. The SDK team flagged
1992    // `CoA.accounting_framework` arriving as null in v4.1.x; the field
1993    // didn't exist at all until v4.4.1.
1994    let coa_meta = serde_json::json!({
1995        "coa_id": result.chart_of_accounts.coa_id,
1996        "name": result.chart_of_accounts.name,
1997        "country": result.chart_of_accounts.country,
1998        "industry": result.chart_of_accounts.industry,
1999        "complexity": result.chart_of_accounts.complexity,
2000        "account_format": result.chart_of_accounts.account_format,
2001        "accounting_framework": result.chart_of_accounts.accounting_framework,
2002        "account_count": result.chart_of_accounts.accounts.len(),
2003    });
2004    match serde_json::to_string_pretty(&coa_meta) {
2005        Ok(json) => {
2006            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts_meta.json"), json) {
2007                warn!("Failed to write CoA metadata: {}", e);
2008            } else {
2009                info!(
2010                    "  Chart of accounts metadata written (accounting_framework: {:?})",
2011                    result.chart_of_accounts.accounting_framework
2012                );
2013            }
2014        }
2015        Err(e) => warn!("Failed to serialize CoA metadata: {}", e),
2016    }
2017
2018    // ========================================================================
2019    // Balance Validation Summary
2020    // ========================================================================
2021    if result.balance_validation.validated {
2022        match serde_json::to_string_pretty(&BalanceValidationSummary::from(
2023            &result.balance_validation,
2024        )) {
2025            Ok(json) => {
2026                if let Err(e) = std::fs::write(output_dir.join("balance_validation.json"), json) {
2027                    warn!("Failed to write balance validation: {}", e);
2028                } else {
2029                    info!("  Balance validation summary written");
2030                }
2031            }
2032            Err(e) => warn!("Failed to serialize balance validation: {}", e),
2033        }
2034    }
2035
2036    // ========================================================================
2037    // Data Quality Statistics (now serializable directly via Serialize derives)
2038    // ========================================================================
2039    {
2040        match serde_json::to_string_pretty(&result.data_quality_stats) {
2041            Ok(json) => {
2042                if let Err(e) = std::fs::write(output_dir.join("data_quality_stats.json"), json) {
2043                    warn!("Failed to write data quality stats: {}", e);
2044                } else {
2045                    info!("  Data quality stats written (full detail)");
2046                }
2047            }
2048            Err(e) => warn!("Failed to serialize data quality stats: {}", e),
2049        }
2050    }
2051
2052    // ========================================================================
2053    // v3.3.0: Analytics-metadata phase outputs (prior year, industry
2054    // benchmarks, management reports, drift events).
2055    // ========================================================================
2056    {
2057        let am = &result.analytics_metadata;
2058        if !am.prior_year_comparatives.is_empty()
2059            || !am.industry_benchmarks.is_empty()
2060            || !am.management_reports.is_empty()
2061            || !am.drift_events.is_empty()
2062        {
2063            let analytics_dir = output_dir.join("analytics");
2064            std::fs::create_dir_all(&analytics_dir)?;
2065            write_json_safe(
2066                &am.prior_year_comparatives,
2067                &analytics_dir.join("prior_year_comparatives.json"),
2068                "Prior-year comparatives (v3.3.0)",
2069            );
2070            write_json_safe(
2071                &am.industry_benchmarks,
2072                &analytics_dir.join("industry_benchmarks.json"),
2073                "Industry benchmarks (v3.3.0)",
2074            );
2075            write_json_safe(
2076                &am.management_reports,
2077                &analytics_dir.join("management_reports.json"),
2078                "Management reports (v3.3.0)",
2079            );
2080            write_json_safe(
2081                &am.drift_events,
2082                &analytics_dir.join("drift_events.json"),
2083                "Drift event labels (v3.3.0)",
2084            );
2085        }
2086    }
2087
2088    // ========================================================================
2089    // Pre-built Analytics (Benford, amount distribution, process variants)
2090    // ========================================================================
2091    {
2092        let analytics_dir = output_dir.join("analytics");
2093
2094        // Collect non-zero amounts from journal entry lines
2095        let amounts: Vec<_> = result
2096            .journal_entries
2097            .iter()
2098            .flat_map(|je| je.lines.iter())
2099            .flat_map(|line| {
2100                let d = (!line.debit_amount.is_zero()).then_some(line.debit_amount);
2101                let c = (!line.credit_amount.is_zero()).then_some(line.credit_amount);
2102                d.into_iter().chain(c)
2103            })
2104            .collect();
2105
2106        if amounts.len() >= 10 {
2107            std::fs::create_dir_all(&analytics_dir)?;
2108            info!("Writing pre-built analytics ({} amounts)...", amounts.len());
2109
2110            // Benford's Law analysis
2111            let benford_analyzer = datasynth_eval::BenfordAnalyzer::default();
2112            match benford_analyzer.analyze(&amounts) {
2113                Ok(ref benford_result) => {
2114                    if let Ok(json) = serde_json::to_string_pretty(benford_result) {
2115                        if let Err(e) =
2116                            std::fs::write(analytics_dir.join("benford_analysis.json"), json)
2117                        {
2118                            warn!("Failed to write Benford analysis: {}", e);
2119                        } else {
2120                            info!(
2121                                "  Benford analysis written (conformity: {:?}, MAD: {:.4})",
2122                                benford_result.conformity, benford_result.mad
2123                            );
2124                        }
2125                    }
2126                }
2127                Err(e) => warn!("Benford analysis skipped: {}", e),
2128            }
2129
2130            // Amount distribution analysis
2131            let amount_analyzer = datasynth_eval::AmountDistributionAnalyzer::new();
2132            match amount_analyzer.analyze(&amounts) {
2133                Ok(ref dist_result) => {
2134                    if let Ok(json) = serde_json::to_string_pretty(dist_result) {
2135                        if let Err(e) =
2136                            std::fs::write(analytics_dir.join("amount_distribution.json"), json)
2137                        {
2138                            warn!("Failed to write amount distribution: {}", e);
2139                        } else {
2140                            info!(
2141                                "  Amount distribution written (skewness: {:.2}, kurtosis: {:.2})",
2142                                dist_result.skewness, dist_result.kurtosis
2143                            );
2144                        }
2145                    }
2146                }
2147                Err(e) => warn!("Amount distribution analysis skipped: {}", e),
2148            }
2149        }
2150
2151        // Process variant summary (from OCPM event log).
2152        //
2153        // v3.1.1 — always emit the file when an event_log exists. When the
2154        // event_log has no pre-computed `variants` map (older OCPM phases
2155        // didn't populate it), derive variants on the fly from the raw
2156        // events so SDK consumers see `analytics/process_variant_summary.json`
2157        // in every archive rather than `null`. Without this, the v3.1
2158        // claim that the file exists was only true when OCPM happened to
2159        // populate its variants map.
2160        if let Some(ref event_log) = result.ocpm.event_log {
2161            std::fs::create_dir_all(&analytics_dir)?;
2162            let variant_data: Vec<datasynth_eval::VariantData> = if !event_log.variants.is_empty() {
2163                event_log
2164                    .variants
2165                    .values()
2166                    .map(|v| datasynth_eval::VariantData {
2167                        variant_id: v.variant_id.clone(),
2168                        case_count: v.frequency as usize,
2169                        is_happy_path: v.is_happy_path,
2170                    })
2171                    .collect()
2172            } else {
2173                // Fallback: derive variants from raw events by case_id.
2174                // Each case's activity sequence (by activity_id) defines a
2175                // variant; cases with the same sequence collapse into one
2176                // variant. Events without a case_id are skipped since they
2177                // can't be grouped into a process instance.
2178                use std::collections::HashMap;
2179                // Key by case_id's string form to avoid pulling the uuid
2180                // crate into the output writer's dependency graph.
2181                let mut per_case: HashMap<String, Vec<String>> = HashMap::new();
2182                for ev in &event_log.events {
2183                    if let Some(case_id) = ev.case_id {
2184                        per_case
2185                            .entry(case_id.to_string())
2186                            .or_default()
2187                            .push(ev.activity_id.clone());
2188                    }
2189                }
2190                let mut variant_counts: HashMap<Vec<String>, usize> = HashMap::new();
2191                for activities in per_case.into_values() {
2192                    *variant_counts.entry(activities).or_insert(0) += 1;
2193                }
2194                // Happy path heuristic: the highest-frequency variant.
2195                let max_count = variant_counts.values().copied().max().unwrap_or(0);
2196                variant_counts
2197                    .into_iter()
2198                    .enumerate()
2199                    .map(|(i, (seq, count))| datasynth_eval::VariantData {
2200                        variant_id: format!("V{i:04}:{}", seq.join("->")),
2201                        case_count: count,
2202                        is_happy_path: count == max_count && max_count > 0,
2203                    })
2204                    .collect()
2205            };
2206
2207            let variant_analyzer = datasynth_eval::VariantAnalyzer::new();
2208            match variant_analyzer.analyze(&variant_data) {
2209                Ok(ref variant_result) => {
2210                    if let Ok(json) = serde_json::to_string_pretty(variant_result) {
2211                        if let Err(e) =
2212                            std::fs::write(analytics_dir.join("process_variant_summary.json"), json)
2213                        {
2214                            warn!("Failed to write variant summary: {}", e);
2215                        } else {
2216                            info!(
2217                                "  Process variant summary written ({} variants, entropy: {:.2})",
2218                                variant_result.variant_count, variant_result.variant_entropy
2219                            );
2220                        }
2221                    }
2222                }
2223                Err(e) => {
2224                    // Even on analyzer error, emit a minimal JSON placeholder
2225                    // so the file always exists in the archive.
2226                    warn!("Variant analysis failed: {}; emitting empty summary", e);
2227                    let placeholder = serde_json::json!({
2228                        "variant_count": 0,
2229                        "variant_entropy": null,
2230                        "happy_path_concentration": null,
2231                        "top_variants": [],
2232                        "passes": false,
2233                        "issues": [format!("analyzer error: {e}")],
2234                    });
2235                    if let Ok(json) = serde_json::to_string_pretty(&placeholder) {
2236                        let _ = std::fs::write(
2237                            analytics_dir.join("process_variant_summary.json"),
2238                            json,
2239                        );
2240                    }
2241                }
2242            }
2243        }
2244
2245        // Banking evaluation (KYC completeness + AML detectability).
2246        // Matches the payload served by /v1/jobs/{id}/analytics so
2247        // archive-mode consumers see the same four files the endpoint returns.
2248        if !result.banking.customers.is_empty() {
2249            use datasynth_core::models::banking::BankingCustomerType;
2250            use datasynth_eval::banking::{
2251                AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
2252                KycCompletenessAnalyzer, KycProfileData, TypologyData,
2253            };
2254            use std::collections::HashMap;
2255            std::fs::create_dir_all(&analytics_dir)?;
2256
2257            let kyc_data: Vec<KycProfileData> = result
2258                .banking
2259                .customers
2260                .iter()
2261                .map(|c| KycProfileData {
2262                    profile_id: c.customer_id.to_string(),
2263                    has_name: true,
2264                    has_dob: c.date_of_birth.is_some(),
2265                    has_address: c.address_line1.is_some(),
2266                    has_id_document: c.national_id.is_some() || c.passport_number.is_some(),
2267                    has_risk_rating: true,
2268                    has_beneficial_owner: !c.beneficial_owners.is_empty(),
2269                    is_entity: c.customer_type == BankingCustomerType::Business,
2270                    is_verified: c.kyc_truthful,
2271                })
2272                .collect();
2273
2274            let mut banking_eval = BankingEvaluation::new();
2275            if let Ok(kyc_res) = KycCompletenessAnalyzer::new().analyze(&kyc_data) {
2276                banking_eval.kyc = Some(kyc_res);
2277            }
2278
2279            let suspicious: Vec<&_> = result
2280                .banking
2281                .transactions
2282                .iter()
2283                .filter(|t| t.is_suspicious)
2284                .collect();
2285            if !suspicious.is_empty() {
2286                // Use AmlTypology::canonical_name() so the evaluator's
2287                // exact-string match against EXPECTED_TYPOLOGIES succeeds.
2288                // Prior to v3.1.1 we used `format!("{:?}", r)` (Debug /
2289                // PascalCase) which never matched the lowercase expected
2290                // names and produced "typology_coverage = 0.000" in every
2291                // run regardless of actual typology injection.
2292                let aml_data: Vec<AmlTransactionData> = suspicious
2293                    .iter()
2294                    .map(|t| AmlTransactionData {
2295                        transaction_id: t.transaction_id.to_string(),
2296                        typology: t
2297                            .suspicion_reason
2298                            .as_ref()
2299                            .map(|r| r.canonical_name().to_string())
2300                            .unwrap_or_default(),
2301                        case_id: t.case_id.clone().unwrap_or_default(),
2302                        amount: t.amount.try_into().unwrap_or(0.0),
2303                        is_flagged: t.is_suspicious,
2304                    })
2305                    .collect();
2306
2307                let mut typology_map: HashMap<String, (usize, HashMap<String, bool>)> =
2308                    HashMap::new();
2309                for txn in &aml_data {
2310                    if !txn.typology.is_empty() {
2311                        let entry = typology_map
2312                            .entry(txn.typology.clone())
2313                            .or_insert_with(|| (0, HashMap::new()));
2314                        entry.0 += 1;
2315                        entry.1.insert(txn.case_id.clone(), true);
2316                    }
2317                }
2318                let typology_data: Vec<TypologyData> = typology_map
2319                    .iter()
2320                    .map(|(name, (count, cases))| TypologyData {
2321                        name: name.clone(),
2322                        scenario_count: *count,
2323                        case_ids_consistent: cases.len() <= *count,
2324                    })
2325                    .collect();
2326
2327                if let Ok(aml_res) =
2328                    AmlDetectabilityAnalyzer::new().analyze(&aml_data, &typology_data)
2329                {
2330                    banking_eval.aml = Some(aml_res);
2331                }
2332            }
2333            banking_eval.check_thresholds();
2334
2335            match serde_json::to_string_pretty(&banking_eval) {
2336                Ok(json) => {
2337                    if let Err(e) =
2338                        std::fs::write(analytics_dir.join("banking_evaluation.json"), json)
2339                    {
2340                        warn!("Failed to write banking evaluation: {}", e);
2341                    } else {
2342                        info!(
2343                            "  Banking evaluation written ({} profiles, {} issues, passes={})",
2344                            result.banking.customers.len(),
2345                            banking_eval.issues.len(),
2346                            banking_eval.passes
2347                        );
2348                    }
2349                }
2350                Err(e) => warn!("Failed to serialize banking evaluation: {}", e),
2351            }
2352        }
2353    }
2354
2355    // ========================================================================
2356    // Data Quality Issue Records + Quality Labels
2357    // ========================================================================
2358    if !result.quality_issues.is_empty() {
2359        let labels_dir = output_dir.join("labels");
2360        std::fs::create_dir_all(&labels_dir)?;
2361        info!("Writing data quality issue records...");
2362        write_json_safe(
2363            &result.quality_issues,
2364            &labels_dir.join("quality_issues.json"),
2365            "Data quality issues",
2366        );
2367
2368        // Derive quality_labels.json from quality_issues: maps each QualityIssue
2369        // to a QualityIssueLabel with the corresponding LabeledIssueType and severity.
2370        use datasynth_generators::{
2371            LabeledIssueType, QualityIssueLabel, QualityIssueType, QualityLabels,
2372        };
2373        let mut quality_labels = QualityLabels::with_capacity(result.quality_issues.len());
2374        for issue in &result.quality_issues {
2375            let labeled_type = match issue.issue_type {
2376                QualityIssueType::MissingValue => LabeledIssueType::MissingValue,
2377                QualityIssueType::Typo => LabeledIssueType::Typo,
2378                QualityIssueType::DateFormatVariation
2379                | QualityIssueType::AmountFormatVariation
2380                | QualityIssueType::IdentifierFormatVariation
2381                | QualityIssueType::TextFormatVariation => LabeledIssueType::FormatVariation,
2382                QualityIssueType::ExactDuplicate
2383                | QualityIssueType::NearDuplicate
2384                | QualityIssueType::FuzzyDuplicate => LabeledIssueType::Duplicate,
2385                QualityIssueType::EncodingIssue => LabeledIssueType::EncodingIssue,
2386            };
2387            let mut label = QualityIssueLabel::new(
2388                labeled_type,
2389                issue.record_id.clone(),
2390                issue.field.clone().unwrap_or_else(|| "_record".to_string()),
2391                "data_quality_injector",
2392            );
2393            if let Some(ref orig) = issue.original_value {
2394                label = label.with_original(orig.clone());
2395            }
2396            if let Some(ref modified) = issue.modified_value {
2397                label = label.with_modified(modified.clone());
2398            }
2399            quality_labels.add(label);
2400        }
2401        if let Ok(json) = serde_json::to_string_pretty(&quality_labels) {
2402            if let Err(e) = std::fs::write(labels_dir.join("quality_labels.json"), json.as_bytes())
2403            {
2404                warn!("Failed to write quality labels: {}", e);
2405            } else {
2406                info!(
2407                    "  Quality labels written: {} labels -> labels/quality_labels.json",
2408                    quality_labels.len()
2409                );
2410            }
2411        }
2412    }
2413
2414    // ========================================================================
2415    // Internal Controls
2416    // ========================================================================
2417    if !result.internal_controls.is_empty() || !result.sod_violations.is_empty() {
2418        let ctrl_dir = output_dir.join("internal_controls");
2419        std::fs::create_dir_all(&ctrl_dir)?;
2420        info!("Writing internal controls data...");
2421
2422        write_json_safe(
2423            &result.internal_controls,
2424            &ctrl_dir.join("internal_controls.json"),
2425            "Internal controls",
2426        );
2427        // SoD violations extracted from control-annotated journal entries
2428        write_json_safe(
2429            &result.sod_violations,
2430            &ctrl_dir.join("sod_violations.json"),
2431            "SoD violations",
2432        );
2433
2434        // SoD conflict pairs, SoD rules, control mappings, and COSO control mapping
2435        // are static reference data — export via ControlExporter regardless of whether
2436        // individual violations were generated so the master catalog is always present.
2437        let exporter = datasynth_output::ControlExporter::new(&ctrl_dir);
2438        match exporter.export_standard() {
2439            Ok(summary) => {
2440                info!(
2441                    "  Control master data written: {} controls, {} SoD conflicts, {} SoD rules, {} COSO mappings, {} account mappings",
2442                    summary.controls_count,
2443                    summary.sod_conflicts_count,
2444                    summary.sod_rules_count,
2445                    summary.coso_mappings_count,
2446                    summary.account_mappings_count,
2447                );
2448            }
2449            Err(e) => warn!("Failed to write control master data: {}", e),
2450        }
2451    }
2452
2453    // ========================================================================
2454    // Accounting Standards
2455    // ========================================================================
2456    if !result.accounting_standards.contracts.is_empty()
2457        || !result.accounting_standards.impairment_tests.is_empty()
2458        || !result.accounting_standards.business_combinations.is_empty()
2459        || !result.accounting_standards.ecl_models.is_empty()
2460        || !result.accounting_standards.provisions.is_empty()
2461        || !result
2462            .accounting_standards
2463            .currency_translation_results
2464            .is_empty()
2465    {
2466        let acct_dir = output_dir.join("accounting_standards");
2467        std::fs::create_dir_all(&acct_dir)?;
2468        info!("Writing accounting standards data...");
2469
2470        write_json_safe(
2471            &result.accounting_standards.contracts,
2472            &acct_dir.join("customer_contracts.json"),
2473            "Customer contracts",
2474        );
2475        write_json_safe(
2476            &result.accounting_standards.impairment_tests,
2477            &acct_dir.join("impairment_tests.json"),
2478            "Impairment tests",
2479        );
2480        write_json_safe(
2481            &result.accounting_standards.business_combinations,
2482            &acct_dir.join("business_combinations.json"),
2483            "Business combinations",
2484        );
2485        write_json_safe(
2486            &result
2487                .accounting_standards
2488                .business_combination_journal_entries,
2489            &acct_dir.join("business_combination_journal_entries.json"),
2490            "Business combination journal entries",
2491        );
2492        write_json_safe(
2493            &result.accounting_standards.ecl_models,
2494            &acct_dir.join("ecl_models.json"),
2495            "ECL models",
2496        );
2497        write_json_safe(
2498            &result.accounting_standards.ecl_provision_movements,
2499            &acct_dir.join("ecl_provision_movements.json"),
2500            "ECL provision movements",
2501        );
2502        write_json_safe(
2503            &result.accounting_standards.ecl_journal_entries,
2504            &acct_dir.join("ecl_journal_entries.json"),
2505            "ECL journal entries",
2506        );
2507        write_json_safe(
2508            &result.accounting_standards.provisions,
2509            &acct_dir.join("provisions.json"),
2510            "Provisions (IAS 37 / ASC 450)",
2511        );
2512        write_json_safe(
2513            &result.accounting_standards.provision_movements,
2514            &acct_dir.join("provision_movements.json"),
2515            "Provision movements",
2516        );
2517        write_json_safe(
2518            &result.accounting_standards.contingent_liabilities,
2519            &acct_dir.join("contingent_liabilities.json"),
2520            "Contingent liabilities",
2521        );
2522        write_json_safe(
2523            &result.accounting_standards.provision_journal_entries,
2524            &acct_dir.join("provision_journal_entries.json"),
2525            "Provision journal entries",
2526        );
2527
2528        // IAS 21 — write under accounting_standards/fx/
2529        if !result
2530            .accounting_standards
2531            .currency_translation_results
2532            .is_empty()
2533        {
2534            let fx_dir = acct_dir.join("fx");
2535            std::fs::create_dir_all(&fx_dir)?;
2536            write_json_safe(
2537                &result.accounting_standards.currency_translation_results,
2538                &fx_dir.join("currency_translation_results.json"),
2539                "IAS 21 currency translation results",
2540            );
2541        }
2542
2543        // v3.3.1: Leases (IFRS 16 / ASC 842)
2544        if !result.accounting_standards.leases.is_empty() {
2545            let leases_dir = acct_dir.join("leases");
2546            std::fs::create_dir_all(&leases_dir)?;
2547            write_json_safe(
2548                &result.accounting_standards.leases,
2549                &leases_dir.join("leases.json"),
2550                "Leases (IFRS 16 / ASC 842) — v3.3.1",
2551            );
2552        }
2553
2554        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
2555        if !result
2556            .accounting_standards
2557            .fair_value_measurements
2558            .is_empty()
2559        {
2560            let fv_dir = acct_dir.join("fair_value");
2561            std::fs::create_dir_all(&fv_dir)?;
2562            write_json_safe(
2563                &result.accounting_standards.fair_value_measurements,
2564                &fv_dir.join("fair_value_measurements.json"),
2565                "Fair value measurements (IFRS 13 / ASC 820) — v3.3.1",
2566            );
2567        }
2568
2569        // v3.3.1: Framework reconciliation (dual reporting)
2570        if !result.accounting_standards.framework_differences.is_empty() {
2571            let diff_dir = acct_dir.join("framework_differences");
2572            std::fs::create_dir_all(&diff_dir)?;
2573            write_json_safe(
2574                &result.accounting_standards.framework_differences,
2575                &diff_dir.join("framework_differences.json"),
2576                "Framework differences (US GAAP vs IFRS) — v3.3.1",
2577            );
2578            write_json_safe(
2579                &result.accounting_standards.framework_reconciliations,
2580                &diff_dir.join("framework_reconciliations.json"),
2581                "Per-entity framework reconciliation — v3.3.1",
2582            );
2583        }
2584    }
2585
2586    // ========================================================================
2587    // Quality Gate Results
2588    // ========================================================================
2589    if let Some(ref gate_result) = result.gate_result {
2590        match serde_json::to_string_pretty(gate_result) {
2591            Ok(json) => {
2592                if let Err(e) = std::fs::write(output_dir.join("quality_gate_result.json"), json) {
2593                    warn!("Failed to write quality gate result: {}", e);
2594                } else {
2595                    info!(
2596                        "  Quality gate result written (passed={})",
2597                        gate_result.passed
2598                    );
2599                }
2600            }
2601            Err(e) => warn!("Failed to serialize quality gate result: {}", e),
2602        }
2603    }
2604
2605    // ========================================================================
2606    // Treasury
2607    // ========================================================================
2608    if !result.treasury.debt_instruments.is_empty()
2609        || !result.treasury.cash_positions.is_empty()
2610        || !result.treasury.hedging_instruments.is_empty()
2611    {
2612        let treasury_dir = output_dir.join("treasury");
2613        std::fs::create_dir_all(&treasury_dir)?;
2614        info!("Writing treasury data...");
2615
2616        write_json_safe(
2617            &result.treasury.debt_instruments,
2618            &treasury_dir.join("debt_instruments.json"),
2619            "Debt instruments",
2620        );
2621        write_json_safe(
2622            &result.treasury.hedging_instruments,
2623            &treasury_dir.join("hedging_instruments.json"),
2624            "Hedging instruments",
2625        );
2626        write_json_safe(
2627            &result.treasury.hedge_relationships,
2628            &treasury_dir.join("hedge_relationships.json"),
2629            "Hedge relationships",
2630        );
2631        write_json_safe(
2632            &result.treasury.cash_positions,
2633            &treasury_dir.join("cash_positions.json"),
2634            "Cash positions",
2635        );
2636        write_json_safe(
2637            &result.treasury.cash_forecasts,
2638            &treasury_dir.join("cash_forecasts.json"),
2639            "Cash forecasts",
2640        );
2641        write_json_safe(
2642            &result.treasury.cash_pools,
2643            &treasury_dir.join("cash_pools.json"),
2644            "Cash pools",
2645        );
2646        write_json_safe(
2647            &result.treasury.cash_pool_sweeps,
2648            &treasury_dir.join("cash_pool_sweeps.json"),
2649            "Cash pool sweeps",
2650        );
2651        write_json_safe(
2652            &result.treasury.bank_guarantees,
2653            &treasury_dir.join("bank_guarantees.json"),
2654            "Bank guarantees",
2655        );
2656        write_json_safe(
2657            &result.treasury.netting_runs,
2658            &treasury_dir.join("netting_runs.json"),
2659            "Netting runs",
2660        );
2661        if !result.treasury.treasury_anomaly_labels.is_empty() {
2662            write_json_safe(
2663                &result.treasury.treasury_anomaly_labels,
2664                &treasury_dir.join("treasury_anomaly_labels.json"),
2665                "Treasury anomaly labels",
2666            );
2667        }
2668    }
2669
2670    // ========================================================================
2671    // Project Accounting
2672    // ========================================================================
2673    if !result.project_accounting.projects.is_empty() {
2674        let pa_dir = output_dir.join("project_accounting");
2675        std::fs::create_dir_all(&pa_dir)?;
2676        info!("Writing project accounting data...");
2677
2678        write_json_safe(
2679            &result.project_accounting.projects,
2680            &pa_dir.join("projects.json"),
2681            "Projects",
2682        );
2683        write_json_safe(
2684            &result.project_accounting.cost_lines,
2685            &pa_dir.join("cost_lines.json"),
2686            "Project cost lines",
2687        );
2688        write_json_safe(
2689            &result.project_accounting.revenue_records,
2690            &pa_dir.join("revenue_records.json"),
2691            "Project revenue records",
2692        );
2693        write_json_safe(
2694            &result.project_accounting.earned_value_metrics,
2695            &pa_dir.join("earned_value_metrics.json"),
2696            "Earned value metrics",
2697        );
2698        write_json_safe(
2699            &result.project_accounting.change_orders,
2700            &pa_dir.join("change_orders.json"),
2701            "Change orders",
2702        );
2703        write_json_safe(
2704            &result.project_accounting.milestones,
2705            &pa_dir.join("milestones.json"),
2706            "Project milestones",
2707        );
2708    }
2709
2710    // ========================================================================
2711    // Evolution Events (Process Evolution + Organizational Events)
2712    // ========================================================================
2713    if !result.process_evolution.is_empty()
2714        || !result.organizational_events.is_empty()
2715        || !result.disruption_events.is_empty()
2716    {
2717        let events_dir = output_dir.join("events");
2718        std::fs::create_dir_all(&events_dir)?;
2719        info!("Writing evolution events...");
2720
2721        write_json_safe(
2722            &result.process_evolution,
2723            &events_dir.join("process_evolution_events.json"),
2724            "Process evolution events",
2725        );
2726        write_json_safe(
2727            &result.organizational_events,
2728            &events_dir.join("organizational_events.json"),
2729            "Organizational events",
2730        );
2731        write_json_safe(
2732            &result.disruption_events,
2733            &events_dir.join("disruption_events.json"),
2734            "Disruption events",
2735        );
2736    }
2737
2738    // ========================================================================
2739    // ML Training: Counterfactual Pairs
2740    // ========================================================================
2741    if !result.counterfactual_pairs.is_empty() {
2742        let ml_dir = output_dir.join("ml_training");
2743        std::fs::create_dir_all(&ml_dir)?;
2744        info!("Writing ML training data...");
2745
2746        write_json_safe(
2747            &result.counterfactual_pairs,
2748            &ml_dir.join("counterfactual_pairs.json"),
2749            "Counterfactual pairs",
2750        );
2751    }
2752
2753    // ========================================================================
2754    // Fraud Red-Flag Indicators
2755    // ========================================================================
2756    if !result.red_flags.is_empty() {
2757        let labels_dir = output_dir.join("labels");
2758        std::fs::create_dir_all(&labels_dir)?;
2759        info!("Writing fraud red-flag indicators...");
2760
2761        write_json_safe(
2762            &result.red_flags,
2763            &labels_dir.join("fraud_red_flags.json"),
2764            "Fraud red flags",
2765        );
2766    }
2767
2768    // ========================================================================
2769    // Collusion Rings
2770    // ========================================================================
2771    if !result.collusion_rings.is_empty() {
2772        let labels_dir = output_dir.join("labels");
2773        std::fs::create_dir_all(&labels_dir)?;
2774        info!("Writing collusion rings...");
2775
2776        write_json_safe(
2777            &result.collusion_rings,
2778            &labels_dir.join("collusion_rings.json"),
2779            "Collusion rings",
2780        );
2781    }
2782
2783    // ========================================================================
2784    // Temporal Vendor Version Chains
2785    // ========================================================================
2786    if !result.temporal_vendor_chains.is_empty() {
2787        let temporal_dir = output_dir.join("temporal");
2788        std::fs::create_dir_all(&temporal_dir)?;
2789        info!("Writing temporal vendor version chains...");
2790
2791        write_json_safe(
2792            &result.temporal_vendor_chains,
2793            &temporal_dir.join("vendor_version_chains.json"),
2794            "Vendor version chains",
2795        );
2796    }
2797
2798    // ========================================================================
2799    // Entity Relationship Graph + Cross-Process Links
2800    // ========================================================================
2801    if result.entity_relationship_graph.is_some() || !result.cross_process_links.is_empty() {
2802        let rel_dir = output_dir.join("relationships");
2803        std::fs::create_dir_all(&rel_dir)?;
2804        info!("Writing entity relationship data...");
2805
2806        if let Some(ref graph) = result.entity_relationship_graph {
2807            match serde_json::to_string_pretty(graph) {
2808                Ok(json) => {
2809                    let path = rel_dir.join("entity_relationship_graph.json");
2810                    if let Err(e) = std::fs::write(&path, json) {
2811                        warn!("Failed to write entity relationship graph: {}", e);
2812                    } else {
2813                        info!(
2814                            "  Entity relationship graph written: {} nodes, {} edges -> {}",
2815                            graph.nodes.len(),
2816                            graph.edges.len(),
2817                            path.display()
2818                        );
2819                    }
2820                }
2821                Err(e) => warn!("Failed to serialize entity relationship graph: {}", e),
2822            }
2823        }
2824
2825        write_json_safe(
2826            &result.cross_process_links,
2827            &rel_dir.join("cross_process_links.json"),
2828            "Cross-process links",
2829        );
2830    }
2831
2832    // ========================================================================
2833    // Industry-Specific Data
2834    // ========================================================================
2835    if let Some(ref industry_output) = result.industry_output {
2836        if !industry_output.gl_accounts.is_empty() {
2837            let industry_dir = output_dir.join("industry");
2838            std::fs::create_dir_all(&industry_dir).ok();
2839            info!("Writing industry-specific data...");
2840            match serde_json::to_string_pretty(industry_output) {
2841                Ok(json) => {
2842                    if let Err(e) = std::fs::write(industry_dir.join("industry_data.json"), json) {
2843                        warn!("Failed to write industry data: {}", e);
2844                    } else {
2845                        info!(
2846                            "  Industry data written: {} GL accounts for {}",
2847                            industry_output.gl_accounts.len(),
2848                            industry_output.industry
2849                        );
2850                    }
2851                }
2852                Err(e) => warn!("Failed to serialize industry data: {}", e),
2853            }
2854        }
2855    }
2856
2857    // ========================================================================
2858    // Graph Export Summary
2859    // ========================================================================
2860    if result.graph_export.exported {
2861        let graph_dir = output_dir.join("graph_export");
2862        std::fs::create_dir_all(&graph_dir).ok();
2863        match serde_json::to_string_pretty(&result.graph_export) {
2864            Ok(json) => {
2865                if let Err(e) = std::fs::write(graph_dir.join("graph_export_summary.json"), json) {
2866                    warn!("Failed to write graph export summary: {}", e);
2867                } else {
2868                    info!("  Graph export summary written");
2869                }
2870            }
2871            Err(e) => warn!("Failed to serialize graph export summary: {}", e),
2872        }
2873    }
2874
2875    // ========================================================================
2876    // Compliance Regulations
2877    // ========================================================================
2878    let cr = &result.compliance_regulations;
2879    let has_compliance_data = !cr.standard_records.is_empty()
2880        || !cr.audit_procedures.is_empty()
2881        || !cr.findings.is_empty()
2882        || !cr.filings.is_empty();
2883    if has_compliance_data {
2884        let cr_dir = output_dir.join("compliance_regulations");
2885        std::fs::create_dir_all(&cr_dir)?;
2886        info!("Writing compliance regulations data...");
2887
2888        write_json_safe(
2889            &cr.standard_records,
2890            &cr_dir.join("compliance_standards.json"),
2891            "Compliance standards",
2892        );
2893        write_json_safe(
2894            &cr.cross_reference_records,
2895            &cr_dir.join("cross_references.json"),
2896            "Cross-references",
2897        );
2898        write_json_safe(
2899            &cr.jurisdiction_records,
2900            &cr_dir.join("jurisdiction_profiles.json"),
2901            "Jurisdiction profiles",
2902        );
2903        write_json_safe(
2904            &cr.audit_procedures,
2905            &cr_dir.join("audit_procedures.json"),
2906            "Audit procedures",
2907        );
2908        write_json_safe(
2909            &cr.findings,
2910            &cr_dir.join("compliance_findings.json"),
2911            "Compliance findings",
2912        );
2913        write_json_safe(
2914            &cr.filings,
2915            &cr_dir.join("regulatory_filings.json"),
2916            "Regulatory filings",
2917        );
2918
2919        if let Some(ref graph) = cr.compliance_graph {
2920            match serde_json::to_string_pretty(graph) {
2921                Ok(json) => {
2922                    if let Err(e) = std::fs::write(cr_dir.join("compliance_graph.json"), json) {
2923                        warn!("Failed to write compliance graph: {}", e);
2924                    } else {
2925                        info!(
2926                            "  Compliance graph written: {} nodes, {} edges",
2927                            graph.nodes.len(),
2928                            graph.edges.len()
2929                        );
2930                    }
2931                }
2932                Err(e) => warn!("Failed to serialize compliance graph: {}", e),
2933            }
2934        }
2935    }
2936
2937    // ========================================================================
2938    // Generation Statistics
2939    // ========================================================================
2940    match serde_json::to_string_pretty(&result.statistics) {
2941        Ok(json) => {
2942            if let Err(e) = std::fs::write(output_dir.join("generation_statistics.json"), json) {
2943                warn!("Failed to write generation statistics: {}", e);
2944            } else {
2945                info!("  Generation statistics written");
2946            }
2947        }
2948        Err(e) => warn!("Failed to serialize generation statistics: {}", e),
2949    }
2950
2951    info!("Output writing complete.");
2952    Ok(())
2953}
2954
2955/// Write JSON with error handling - logs a warning on failure but does not abort.
2956///
2957/// When the `FLAT_LAYOUT_ACTIVE` thread-local is true (set by
2958/// `write_all_output_with_layout` when `export_layout: flat`), this routes
2959/// through `write_json_flat` so nested `{header, lines|items|allocations}`
2960/// shapes are automatically flattened. For structures without that shape,
2961/// `write_json_flat` passes through unchanged.
2962fn write_json_safe<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2963    // Skip JSON entirely when not in requested output formats
2964    if SKIP_JSON.with(|c| c.get()) {
2965        return;
2966    }
2967    if FLAT_LAYOUT_ACTIVE.with(|c| c.get()) {
2968        write_json_flat(data, path, label);
2969    } else if let Err(e) = write_json(data, path, label) {
2970        warn!("Failed to write {}: {}", label, e);
2971    }
2972}
2973
2974/// Write JSON, choosing flat or nested layout based on the flag.
2975fn write_json_auto<T: serde::Serialize>(data: &[T], path: &Path, label: &str, flat: bool) {
2976    if flat {
2977        write_json_flat(data, path, label);
2978    } else {
2979        write_json_safe(data, path, label);
2980    }
2981}
2982
2983/// Write a JSON file ALWAYS, even when the slice is empty (writes `[]`).
2984///
2985/// Use for files that must exist in the archive for SDK consumers
2986/// (e.g., `audit_opinions.json`) regardless of whether the phase that
2987/// populates them ran. `write_json_safe` / `write_json` short-circuit
2988/// on empty slices, which would break manifest-driven clients that
2989/// expect the file to be present.
2990fn write_json_always<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2991    if SKIP_JSON.with(|c| c.get()) {
2992        return;
2993    }
2994    match std::fs::File::create(path) {
2995        Ok(file) => {
2996            let mut writer = std::io::BufWriter::with_capacity(64 * 1024, file);
2997            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
2998                writer.write_all(b"[\n")?;
2999                for (i, item) in data.iter().enumerate() {
3000                    if i > 0 {
3001                        writer.write_all(b",\n")?;
3002                    }
3003                    serde_json::to_writer_pretty(&mut writer, item)?;
3004                }
3005                if !data.is_empty() {
3006                    writer.write_all(b"\n")?;
3007                }
3008                writer.write_all(b"]\n")?;
3009                writer.flush()?;
3010                Ok(())
3011            })() {
3012                warn!("Failed to write {}: {}", label, e);
3013            } else {
3014                info!(
3015                    "  {} written: {} records -> {}",
3016                    label,
3017                    data.len(),
3018                    path.display()
3019                );
3020            }
3021        }
3022        Err(e) => {
3023            warn!("Failed to create {}: {}", path.display(), e);
3024        }
3025    }
3026}
3027
3028/// Write a flat JSON file by expanding a primary items array and merging the
3029/// surrounding context onto each line.
3030///
3031/// Flattens any record that contains a recognised items array
3032/// (`items`, `lines`, `line_items`, or `allocations`) into one row per line,
3033/// carrying over both the optional `header` sub-object and all other
3034/// top-level fields. Records without a recognised items array are emitted
3035/// as-is, except that an optional nested `header` sub-object is unwrapped
3036/// onto the top level so consumers see a uniformly flat shape.
3037///
3038/// Flow-style documents (`{header, items}`) and subledger-style documents
3039/// (`{..top-level scalars.., lines}`, e.g. AP/AR invoices, inventory
3040/// valuation runs) are both handled — fixing the SDK-team-reported gap
3041/// where subledger invoices were left with `lines` nested in flat mode.
3042///
3043/// Uses heap-allocated intermediates to avoid stack overflow with large
3044/// records in constrained environments (e.g., distroless containers with
3045/// glibc 2.36). Fixes #116.
3046fn write_json_flat<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
3047    if data.is_empty() {
3048        return;
3049    }
3050
3051    // Pre-allocate on heap — avoid flat_map closure accumulating on the stack
3052    let mut flat: Vec<serde_json::Value> = Vec::with_capacity(data.len());
3053
3054    for item in data {
3055        let val = match serde_json::to_value(item) {
3056            Ok(v) => v,
3057            Err(e) => {
3058                warn!("Failed to serialize record for flat export: {}", e);
3059                continue;
3060            }
3061        };
3062
3063        let serde_json::Value::Object(map) = val else {
3064            flat.push(val);
3065            continue;
3066        };
3067
3068        // Find the primary items array key (first match wins).
3069        let items_key = ["items", "lines", "allocations", "line_items"]
3070            .iter()
3071            .find(|k| map.contains_key(**k))
3072            .copied();
3073
3074        // Optional nested header sub-object (used by document flows).
3075        let header_map = match map.get("header") {
3076            Some(serde_json::Value::Object(h)) => Some(h),
3077            _ => None,
3078        };
3079
3080        let Some(items_key) = items_key else {
3081            // No items array. Emit one row, unwrapping the optional header
3082            // sub-object so consumers see a flat shape regardless of model
3083            // layout (e.g. Payments have `header` but no items/allocations
3084            // when allocations are empty).
3085            if let Some(header_map) = header_map {
3086                let mut merged = map.clone();
3087                merged.remove("header");
3088                for (k, v) in header_map {
3089                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3090                }
3091                flat.push(serde_json::Value::Object(merged));
3092            } else {
3093                flat.push(serde_json::Value::Object(map));
3094            }
3095            continue;
3096        };
3097
3098        let Some(serde_json::Value::Array(items)) = map.get(items_key) else {
3099            // `items_key` present but not an array — passthrough.
3100            flat.push(serde_json::Value::Object(map));
3101            continue;
3102        };
3103
3104        // Empty items array: emit one row with the (unwrapped) header
3105        // context so downstream consumers can still find the parent
3106        // record — prevents silently dropping empty-lines invoices.
3107        if items.is_empty() {
3108            let mut merged = map.clone();
3109            merged.remove(items_key);
3110            if let Some(header_map) = header_map {
3111                merged.remove("header");
3112                for (k, v) in header_map {
3113                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3114                }
3115            }
3116            flat.push(serde_json::Value::Object(merged));
3117            continue;
3118        }
3119
3120        // Collect all other top-level fields (scalars, objects, arrays)
3121        // so they carry over onto every flattened line — matching pandas
3122        // `explode()` semantics. This is the behaviour SDK consumers
3123        // expect: header context is repeated per line, nested objects
3124        // like `net_amount: {amount, currency}` come along for the ride.
3125        let top_fields: Vec<(&String, &serde_json::Value)> = map
3126            .iter()
3127            .filter(|(k, _)| k.as_str() != "header" && k.as_str() != items_key)
3128            .collect();
3129
3130        flat.reserve(items.len());
3131        for item_val in items {
3132            let mut merged = serde_json::Map::new();
3133            // Line/item fields first (take precedence on collisions).
3134            if let serde_json::Value::Object(m) = item_val {
3135                merged.extend(m.iter().map(|(k, v)| (k.clone(), v.clone())));
3136            }
3137            // Header sub-object (when present) — don't overwrite line fields.
3138            if let Some(header_map) = header_map {
3139                for (k, v) in header_map {
3140                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3141                }
3142            }
3143            // All other top-level fields.
3144            for &(k, v) in &top_fields {
3145                merged.entry(k.clone()).or_insert_with(|| v.clone());
3146            }
3147            flat.push(serde_json::Value::Object(merged));
3148        }
3149    }
3150
3151    if flat.is_empty() {
3152        return;
3153    }
3154
3155    // Stream-write each flattened record instead of serializing the whole Vec
3156    let count = flat.len();
3157    match std::fs::File::create(path) {
3158        Ok(file) => {
3159            use std::io::Write;
3160            let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
3161            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
3162                writer.write_all(b"[\n")?;
3163                for (i, item) in flat.iter().enumerate() {
3164                    if i > 0 {
3165                        writer.write_all(b",\n")?;
3166                    }
3167                    serde_json::to_writer_pretty(&mut writer, item)?;
3168                }
3169                writer.write_all(b"\n]\n")?;
3170                writer.flush()?;
3171                Ok(())
3172            })() {
3173                warn!("Failed to write {}: {}", label, e);
3174            } else {
3175                info!(
3176                    "  {} written (flat): {} records -> {}",
3177                    label,
3178                    count,
3179                    path.display()
3180                );
3181            }
3182        }
3183        Err(e) => warn!("Failed to create {}: {}", label, e),
3184    }
3185}
3186
3187/// Write a single serializable value as a JSON file.
3188fn write_json_single<T: serde::Serialize>(
3189    data: &T,
3190    path: &Path,
3191    label: &str,
3192) -> Result<(), Box<dyn std::error::Error>> {
3193    let file = std::fs::File::create(path)?;
3194    let writer = std::io::BufWriter::with_capacity(256 * 1024, file);
3195    serde_json::to_writer_pretty(writer, data)?;
3196    info!("  {} written -> {}", label, path.display());
3197    Ok(())
3198}
3199
3200/// Write a single serializable value as a JSON file, logging a warning on failure.
3201fn write_json_single_safe<T: serde::Serialize>(data: &T, path: &Path, label: &str) {
3202    if SKIP_JSON.with(|c| c.get()) {
3203        return;
3204    }
3205    if let Err(e) = write_json_single(data, path, label) {
3206        warn!("Failed to write {}: {}", label, e);
3207    }
3208}
3209
3210/// Serializable summary of balance validation (avoids serializing the full
3211/// `BalanceValidationResult` which has non-Serialize validation error types).
3212#[derive(serde::Serialize)]
3213struct BalanceValidationSummary {
3214    validated: bool,
3215    is_balanced: bool,
3216    entries_processed: u64,
3217    total_debits: String,
3218    total_credits: String,
3219    accounts_tracked: usize,
3220    companies_tracked: usize,
3221    has_unbalanced_entries: bool,
3222    validation_error_count: usize,
3223}
3224
3225impl BalanceValidationSummary {
3226    fn from(v: &crate::enhanced_orchestrator::BalanceValidationResult) -> Self {
3227        Self {
3228            validated: v.validated,
3229            is_balanced: v.is_balanced,
3230            entries_processed: v.entries_processed,
3231            total_debits: v.total_debits.to_string(),
3232            total_credits: v.total_credits.to_string(),
3233            accounts_tracked: v.accounts_tracked,
3234            companies_tracked: v.companies_tracked,
3235            has_unbalanced_entries: v.has_unbalanced_entries,
3236            validation_error_count: v.validation_errors.len(),
3237        }
3238    }
3239}