Skip to main content

datasynth_runtime/
output_writer.rs

1//! Comprehensive output writer for all generated data.
2//!
3//! Writes all generated data from the EnhancedGenerationResult to files
4//! in the output directory. Uses CSV for flat tabular data (journal entry
5//! lines) and JSON for types with nested structures (Vecs, sub-structs).
6
7use std::cell::Cell;
8use std::io::Write;
9use std::path::Path;
10
11use crate::enhanced_orchestrator::EnhancedGenerationResult;
12use datasynth_core::documents::PaymentType;
13use datasynth_output::OutputRootConfig;
14use tracing::{info, warn};
15
16thread_local! {
17    /// Thread-local flat-layout flag. When true, every `write_json_safe` call
18    /// routes through `write_json_flat` so nested `{header, lines}` shapes get
19    /// flattened. Set by `write_all_output_with_layout` at the top of its body,
20    /// reset on exit.
21    static FLAT_LAYOUT_ACTIVE: Cell<bool> = const { Cell::new(false) };
22
23    /// Thread-local JSON skip flag. When true, `write_json_safe` becomes a no-op.
24    /// Set by `write_all_output_with_layout` when the requested formats don't
25    /// include JSON. This avoids wrapping 190+ call sites in `if write_json`.
26    static SKIP_JSON: Cell<bool> = const { Cell::new(false) };
27}
28
29/// Write a JSON file for any serializable slice. Skips empty slices.
30///
31/// Streams JSON directly to a buffered file writer instead of allocating
32/// the entire JSON string in memory (Phase 3 I/O optimization).
33/// Write a JSON array by streaming one record at a time.
34///
35/// Instead of serializing the entire `&[T]` in one `to_writer_pretty` call
36/// (which builds a massive in-memory serde state for large arrays), this
37/// writes `[\n` + per-record pretty-printed JSON with commas + `\n]`.
38///
39/// For 200K+ records this reduces peak memory and improves write throughput
40/// by avoiding serde's internal buffering of the full array structure.
41fn write_json<T: serde::Serialize>(
42    data: &[T],
43    path: &Path,
44    label: &str,
45) -> Result<(), Box<dyn std::error::Error>> {
46    use std::io::Write;
47
48    if data.is_empty() {
49        return Ok(());
50    }
51
52    let file = std::fs::File::create(path)?;
53    let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
54
55    // Stream records one at a time into a JSON array
56    writer.write_all(b"[\n")?;
57    for (i, item) in data.iter().enumerate() {
58        if i > 0 {
59            writer.write_all(b",\n")?;
60        }
61        serde_json::to_writer_pretty(&mut writer, item)?;
62    }
63    writer.write_all(b"\n]\n")?;
64    writer.flush()?;
65
66    info!(
67        "  {} written: {} records -> {}",
68        label,
69        data.len(),
70        path.display()
71    );
72    Ok(())
73}
74
75/// Write journal entry lines as a flat CSV file.
76///
77/// This extracts the key fields from both the header and each line item to
78/// produce a single flat CSV that can be loaded directly into dataframes.
79fn write_journal_entries_csv(
80    result: &EnhancedGenerationResult,
81    output_dir: &Path,
82) -> Result<(), Box<dyn std::error::Error>> {
83    if result.journal_entries.is_empty() {
84        return Ok(());
85    }
86
87    let path = output_dir.join("journal_entries.csv");
88    let file = std::fs::File::create(&path)?;
89    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
90
91    // Write header.
92    //
93    // Schema note: each release that widens the schema appends new
94    // columns at the end so existing column-positional consumers keep
95    // working.
96    //   v5.5.1 added:
97    //     is_manual, is_post_close, source_system     (audit / ETL provenance)
98    //     account_description                         (joined from CoA)
99    //     financial_statement_category                (asset/liability/...)
100    //     assignment, value_date, tax_code            (already-populated line fields)
101    //     transaction_id                              (stable per-line id)
102    //   v5.6.0 added (ISO 21378 Audit Data Collection classification):
103    //     account_class, account_class_name           (Level-2 e.g. "A.B" / "Trade Receivables")
104    //     account_sub_class, account_sub_class_name   (Level-3 e.g. "A.B.A" / "Trade Accounts Receivable")
105    writeln!(
106        w,
107        "document_id,company_code,fiscal_year,fiscal_period,posting_date,document_date,\
108         document_type,currency,exchange_rate,reference,header_text,created_by,source,\
109         business_process,ledger,is_fraud,is_anomaly,\
110         line_number,gl_account,debit_amount,credit_amount,local_amount,\
111         cost_center,profit_center,line_text,\
112         auxiliary_account_number,auxiliary_account_label,lettrage,lettrage_date,\
113         is_manual,is_post_close,source_system,\
114         account_description,financial_statement_category,\
115         assignment,value_date,tax_code,transaction_id,\
116         account_class,account_class_name,account_sub_class,account_sub_class_name"
117    )?;
118
119    // Build a CoA → (short_description, ISO class, ISO sub-class) lookup.
120    // Empty when no CoA was generated (e.g. some smoke tests); resolution
121    // falls back to the line's already-populated `account_description`
122    // and to empty ISO codes.
123    let coa_index: std::collections::HashMap<&str, (&str, &str, &str, &str, &str)> = result
124        .chart_of_accounts
125        .accounts
126        .iter()
127        .map(|a| {
128            (
129                a.account_number.as_str(),
130                (
131                    a.short_description.as_str(),
132                    a.account_class.as_str(),
133                    a.account_class_name.as_str(),
134                    a.account_sub_class.as_str(),
135                    a.account_sub_class_name.as_str(),
136                ),
137            )
138        })
139        .collect();
140
141    for je in &result.journal_entries {
142        let h = &je.header;
143        for line in &je.lines {
144            let lettrage_date_str = line
145                .lettrage_date
146                .map(|d| d.to_string())
147                .unwrap_or_default();
148            let value_date_str = line.value_date.map(|d| d.to_string()).unwrap_or_default();
149            // Look up CoA-joined fields in one shot.
150            let coa_hit = coa_index.get(line.gl_account.as_str()).copied();
151            let coa_short_desc = coa_hit.map(|t| t.0).unwrap_or("");
152            let coa_class = coa_hit.map(|t| t.1).unwrap_or("");
153            let coa_class_name = coa_hit.map(|t| t.2).unwrap_or("");
154            let coa_sub_class = coa_hit.map(|t| t.3).unwrap_or("");
155            let coa_sub_class_name = coa_hit.map(|t| t.4).unwrap_or("");
156            // Prefer the line's own account_description; fall back to the CoA
157            // lookup so consumers always get a name even when the generator
158            // forgot to populate the field.
159            let account_description: &str = line
160                .account_description
161                .as_deref()
162                .filter(|s| !s.is_empty())
163                .unwrap_or(coa_short_desc);
164            // Derive the FSA category from the gl_account prefix (1xxx=asset,
165            // 2xxx=liability, ...). Cheap, deterministic, no CoA dependency.
166            let fsa_category =
167                datasynth_core::accounts::AccountCategory::from_account(line.gl_account.as_str())
168                    .as_label();
169            // Stable per-line identifier (UUID v5 of document_id+line_number).
170            let transaction_id = line.transaction_id.clone().unwrap_or_else(|| {
171                datasynth_core::models::JournalEntryLine::derive_transaction_id(
172                    line.document_id,
173                    line.line_number,
174                )
175            });
176            writeln!(
177                w,
178                "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}",
179                h.document_id,
180                csv_escape(&h.company_code),
181                h.fiscal_year,
182                h.fiscal_period,
183                h.posting_date,
184                h.document_date,
185                csv_escape(&h.document_type),
186                csv_escape(&h.currency),
187                h.exchange_rate,
188                csv_opt_str(&h.reference),
189                csv_opt_str(&h.header_text),
190                csv_escape(&h.created_by),
191                h.source,
192                h.business_process
193                    .map(|bp| format!("{bp:?}"))
194                    .unwrap_or_default(),
195                csv_escape(&h.ledger),
196                h.is_fraud,
197                h.is_anomaly,
198                line.line_number,
199                csv_escape(&line.gl_account),
200                line.debit_amount,
201                line.credit_amount,
202                line.local_amount,
203                csv_opt_str(&line.cost_center),
204                csv_opt_str(&line.profit_center),
205                csv_opt_str(&line.line_text),
206                csv_opt_str(&line.auxiliary_account_number),
207                csv_opt_str(&line.auxiliary_account_label),
208                csv_opt_str(&line.lettrage),
209                lettrage_date_str,
210                h.is_manual,
211                h.is_post_close,
212                csv_escape(&h.source_system),
213                csv_escape(account_description),
214                fsa_category,
215                csv_opt_str(&line.assignment),
216                value_date_str,
217                csv_opt_str(&line.tax_code),
218                csv_escape(&transaction_id),
219                csv_escape(coa_class),
220                csv_escape(coa_class_name),
221                csv_escape(coa_sub_class),
222                csv_escape(coa_sub_class_name),
223            )?;
224        }
225    }
226
227    w.flush()?;
228    let total_lines: usize = result.journal_entries.iter().map(|je| je.lines.len()).sum();
229    info!(
230        "  Journal entries CSV written: {} entries, {} line items -> {}",
231        result.journal_entries.len(),
232        total_lines,
233        path.display()
234    );
235    Ok(())
236}
237
238/// **v5.8.0** — write `graphs/je_network.csv`: a flat edge-list of the
239/// accounting network derived from journal entries.
240///
241/// Each row represents one debit↔credit flow within a single JE,
242/// formed via the cartesian product of debit lines × credit lines (the
243/// approach in `datasynth-graph::TransactionGraphBuilder`). For a
244/// 2-line JE this is exactly the bijective Method-A flow from
245/// Ivertowski et al. (2024); for larger JEs it is a Method-B/C
246/// approximation with proportional amount allocation.
247///
248/// Joins back to `journal_entries.csv` via:
249///   - `document_id` → JE-level header
250///   - `from_line_id` / `to_line_id` → per-line `transaction_id`
251///   - `predecessor_edge_id` → previous flow in a document chain
252fn write_je_network_csv(
253    result: &EnhancedGenerationResult,
254    output_dir: &Path,
255) -> Result<(), Box<dyn std::error::Error>> {
256    use rust_decimal::Decimal;
257
258    if result.journal_entries.is_empty() {
259        return Ok(());
260    }
261    let graphs_dir = output_dir.join("graphs");
262    std::fs::create_dir_all(&graphs_dir)?;
263    let path = graphs_dir.join("je_network.csv");
264    let file = std::fs::File::create(&path)?;
265    let mut w = std::io::BufWriter::with_capacity(256 * 1024, file);
266
267    writeln!(
268        w,
269        "edge_id,document_id,posting_date,from_account,to_account,\
270         from_line_id,to_line_id,amount,confidence,\
271         predecessor_edge_id,business_process,is_fraud,is_anomaly"
272    )?;
273
274    // Build a map line_transaction_id → edge_id of the FIRST out-going
275    // edge from that line so predecessor_edge_id can be resolved without
276    // a second pass: if a curr-line's predecessor_line_id points at
277    // some prev-line's transaction_id, the predecessor edge is the
278    // first edge in that prev JE that has this line as its credit
279    // ("from") side. This reasonably maps each line to one canonical
280    // outgoing edge.
281    //
282    // We populate the map during the first pass (write the rows) and
283    // resolve predecessor_edge_id with the existing entry — JEs are
284    // emitted in chain order, so the predecessor will already be in
285    // the map by the time we look it up.
286    let mut line_id_to_edge_id: std::collections::HashMap<String, String> =
287        std::collections::HashMap::with_capacity(result.journal_entries.len() * 2);
288
289    let mut total_edges: usize = 0;
290
291    for je in &result.journal_entries {
292        let h = &je.header;
293        // Pre-compute transaction_ids for every line (so we can pair
294        // them without re-deriving inside the inner loop).
295        let line_ids: Vec<String> = je
296            .lines
297            .iter()
298            .map(|l| {
299                l.transaction_id.clone().unwrap_or_else(|| {
300                    datasynth_core::models::JournalEntryLine::derive_transaction_id(
301                        l.document_id,
302                        l.line_number,
303                    )
304                })
305            })
306            .collect();
307
308        let debits: Vec<usize> = je
309            .lines
310            .iter()
311            .enumerate()
312            .filter(|(_, l)| l.debit_amount > Decimal::ZERO)
313            .map(|(i, _)| i)
314            .collect();
315        let credits: Vec<usize> = je
316            .lines
317            .iter()
318            .enumerate()
319            .filter(|(_, l)| l.credit_amount > Decimal::ZERO)
320            .map(|(i, _)| i)
321            .collect();
322        if debits.is_empty() || credits.is_empty() {
323            continue;
324        }
325
326        let total_debit: Decimal = debits.iter().map(|i| je.lines[*i].debit_amount).sum();
327        let total_credit: Decimal = credits.iter().map(|i| je.lines[*i].credit_amount).sum();
328        if total_debit.is_zero() || total_credit.is_zero() {
329            continue;
330        }
331
332        // Confidence per the paper: bijective on 2-line entries
333        // (Method A), 1/(n*m) approximation otherwise.
334        let confidence: f64 = if debits.len() == 1 && credits.len() == 1 {
335            1.0
336        } else {
337            1.0 / (debits.len() * credits.len()) as f64
338        };
339
340        let bp = h
341            .business_process
342            .map(|bp| format!("{bp:?}"))
343            .unwrap_or_default();
344        let posting_date = h.posting_date.to_string();
345        let doc_id = h.document_id.to_string();
346
347        for &di in &debits {
348            let debit_line = &je.lines[di];
349            let to_line_id = &line_ids[di];
350            for &ci in &credits {
351                let credit_line = &je.lines[ci];
352                let from_line_id = &line_ids[ci];
353
354                // Edge id = UUID v5 of (document_id, debit.line_number,
355                // credit.line_number). Stable across regenerations.
356                let mut input = Vec::with_capacity(16 + 8);
357                input.extend_from_slice(h.document_id.as_bytes());
358                input.extend_from_slice(&debit_line.line_number.to_le_bytes());
359                input.extend_from_slice(&credit_line.line_number.to_le_bytes());
360                let edge_id = uuid::Uuid::new_v5(&uuid::Uuid::NAMESPACE_OID, &input).to_string();
361
362                // Proportional allocation of the flow's amount (matches
363                // the existing TransactionGraphBuilder::add_journal_entry
364                // _debit_credit formula).
365                let proportion = (debit_line.debit_amount / total_debit)
366                    * (credit_line.credit_amount / total_credit);
367                let amount = debit_line.debit_amount * proportion;
368
369                // Resolve predecessor: the predecessor_line_id from the
370                // CREDIT side (it represents the "from" of the next
371                // edge); fall back to the debit side. Either points at
372                // a transaction_id whose first outgoing edge id is the
373                // predecessor edge.
374                let predecessor_edge_id: String = credit_line
375                    .predecessor_line_id
376                    .as_ref()
377                    .or(debit_line.predecessor_line_id.as_ref())
378                    .and_then(|tx_id| line_id_to_edge_id.get(tx_id).cloned())
379                    .unwrap_or_default();
380
381                writeln!(
382                    w,
383                    "{},{},{},{},{},{},{},{},{},{},{},{},{}",
384                    csv_escape(&edge_id),
385                    csv_escape(&doc_id),
386                    csv_escape(&posting_date),
387                    csv_escape(&credit_line.gl_account),
388                    csv_escape(&debit_line.gl_account),
389                    csv_escape(from_line_id),
390                    csv_escape(to_line_id),
391                    amount,
392                    confidence,
393                    csv_escape(&predecessor_edge_id),
394                    csv_escape(&bp),
395                    h.is_fraud,
396                    h.is_anomaly,
397                )?;
398
399                // Map the credit line ("from" end) to its first edge —
400                // this is the canonical outgoing edge subsequent JEs
401                // can refer to as their predecessor.
402                line_id_to_edge_id
403                    .entry(from_line_id.clone())
404                    .or_insert_with(|| edge_id.clone());
405                total_edges += 1;
406            }
407        }
408    }
409
410    w.flush()?;
411    info!(
412        "  JE network CSV written: {} edges from {} entries -> {}",
413        total_edges,
414        result.journal_entries.len(),
415        path.display()
416    );
417    Ok(())
418}
419
420/// Write journal entries as flat JSON (header fields merged onto each line).
421///
422/// Each object in the output array contains all header fields plus all line fields,
423/// with no nesting. This is the analytics-friendly format.
424fn write_journal_entries_flat_json(
425    result: &EnhancedGenerationResult,
426    output_dir: &Path,
427) -> Result<(), Box<dyn std::error::Error>> {
428    if result.journal_entries.is_empty() {
429        return Ok(());
430    }
431
432    let path = output_dir.join("journal_entries.json");
433    let file = std::fs::File::create(&path)?;
434    let mut writer = std::io::BufWriter::with_capacity(256 * 1024, file);
435
436    // Write opening bracket
437    writer.write_all(b"[\n")?;
438
439    let mut first = true;
440    let mut total_lines = 0usize;
441    for je in &result.journal_entries {
442        // Serialize header to a JSON map
443        let header_value = serde_json::to_value(&je.header)?;
444
445        for line in &je.lines {
446            if !first {
447                writer.write_all(b",\n")?;
448            }
449            first = false;
450            total_lines += 1;
451
452            // Serialize line to a JSON map, then merge header fields in
453            let mut line_value = serde_json::to_value(line)?;
454
455            if let serde_json::Value::Object(ref header_map) = header_value {
456                if let serde_json::Value::Object(ref mut line_map) = line_value {
457                    for (key, val) in header_map {
458                        // Line fields take precedence for shared keys (e.g. document_id)
459                        if !line_map.contains_key(key) {
460                            line_map.insert(key.clone(), val.clone());
461                        }
462                    }
463                }
464            }
465
466            serde_json::to_writer_pretty(&mut writer, &line_value)?;
467        }
468    }
469
470    writer.write_all(b"\n]\n")?;
471    writer.flush()?;
472    info!(
473        "  Journal entries (flat JSON) written: {} line items -> {}",
474        total_lines,
475        path.display()
476    );
477    Ok(())
478}
479
480/// v4.4.2 helper — walk a serialized OCEL event-log `Value` tree and
481/// mirror `object_type_id` into `object_type` on every
482/// `object_refs[*]` entry. The canonical OCEL 2.0 field name is
483/// `object_type`; DataSynth's internal model carries it as
484/// `object_type_id` for historical reasons. Emitting both keys lets
485/// OCEL-spec-compliant consumers (pm4py, Celonis, etc.) see the type
486/// without a rename step.
487fn add_ocel_object_type_alias(value: &mut serde_json::Value) {
488    if let Some(events) = value.get_mut("events").and_then(|v| v.as_array_mut()) {
489        for event in events.iter_mut() {
490            if let Some(refs) = event.get_mut("object_refs").and_then(|r| r.as_array_mut()) {
491                for oref in refs.iter_mut() {
492                    if let Some(obj) = oref.as_object_mut() {
493                        if let Some(oti) = obj.get("object_type_id").cloned() {
494                            obj.entry("object_type").or_insert(oti);
495                        }
496                    }
497                }
498            }
499        }
500    }
501}
502
503/// Escape a string for CSV output by quoting if it contains commas or quotes.
504fn csv_escape(s: &str) -> String {
505    if s.contains(',') || s.contains('"') || s.contains('\n') {
506        format!("\"{}\"", s.replace('"', "\"\""))
507    } else {
508        s.to_string()
509    }
510}
511
512/// Format an Option<String> for CSV output (empty string for None).
513fn csv_opt_str(opt: &Option<String>) -> String {
514    match opt {
515        Some(s) => csv_escape(s),
516        None => String::new(),
517    }
518}
519
520/// Write all generated data to the output directory.
521///
522/// This function exports every non-empty dataset from the generation result.
523/// Journal entries are written as a flat CSV file (one row per line item)
524/// and as a nested JSON file. Other data is written as JSON files since
525/// many model types contain nested structures.
526#[allow(dead_code)]
527pub fn write_all_output(
528    result: &EnhancedGenerationResult,
529    output_dir: &Path,
530) -> Result<(), Box<dyn std::error::Error>> {
531    write_all_output_with_layout(
532        result,
533        output_dir,
534        datasynth_config::ExportLayout::Nested,
535        &[
536            datasynth_config::FileFormat::Csv,
537            datasynth_config::FileFormat::Json,
538        ],
539    )
540}
541
542/// Variant of [`write_all_output_with_layout`] that routes output through
543/// an [`OutputRootConfig`] instead of a raw `&Path`.
544///
545/// Per-entity subtree mode is used by the group-audit shard runner
546/// (v5.0+): the runner sets `per_entity_subtree: true` and
547/// `entity_code: Some(code)` on `root`, and this helper drops each
548/// entity's archive under `{root_dir}/entities/{code}/` so group-wide
549/// artifacts can still live at `{root_dir}/`.
550///
551/// In flat mode (the default for single-entity runs) this is exactly
552/// equivalent to calling [`write_all_output_with_layout`] with
553/// `output_dir = root.root_dir`, so the signature and behavior of the
554/// existing single-entity entrypoints are unchanged.
555#[allow(dead_code)]
556pub fn write_all_output_with_root(
557    result: &EnhancedGenerationResult,
558    root: &OutputRootConfig,
559    export_layout: datasynth_config::ExportLayout,
560    formats: &[datasynth_config::FileFormat],
561) -> Result<(), Box<dyn std::error::Error>> {
562    let effective = root.effective_dir();
563    write_all_output_with_layout(result, &effective, export_layout, formats)
564}
565
566/// Write all generated data with a configurable export layout and format set.
567///
568/// Only writes files for formats present in `formats`. If `formats` is empty,
569/// writes both CSV and JSON (backward compatible). This allows skipping JSON
570/// when only CSV is needed, which halves output time for large datasets.
571pub fn write_all_output_with_layout(
572    result: &EnhancedGenerationResult,
573    output_dir: &Path,
574    export_layout: datasynth_config::ExportLayout,
575    formats: &[datasynth_config::FileFormat],
576) -> Result<(), Box<dyn std::error::Error>> {
577    let csv_enabled = formats.is_empty()
578        || formats.contains(&datasynth_config::FileFormat::Csv)
579        || formats.contains(&datasynth_config::FileFormat::Parquet);
580    let json_enabled = formats.is_empty()
581        || formats.contains(&datasynth_config::FileFormat::Json)
582        || formats.contains(&datasynth_config::FileFormat::JsonLines);
583    std::fs::create_dir_all(output_dir)?;
584    info!("Writing comprehensive output to: {}", output_dir.display());
585
586    // Set flat-layout flag for all `write_json_safe` calls in this pass.
587    // Scope guard ensures we reset on return (including error paths).
588    struct FlatLayoutGuard;
589    impl Drop for FlatLayoutGuard {
590        fn drop(&mut self) {
591            FLAT_LAYOUT_ACTIVE.with(|c| c.set(false));
592        }
593    }
594    let _flat_guard = if export_layout == datasynth_config::ExportLayout::Flat {
595        FLAT_LAYOUT_ACTIVE.with(|c| c.set(true));
596        Some(FlatLayoutGuard)
597    } else {
598        None
599    };
600
601    // Set JSON skip flag so `write_json_safe` becomes a no-op when JSON not requested.
602    struct SkipJsonGuard;
603    impl Drop for SkipJsonGuard {
604        fn drop(&mut self) {
605            SKIP_JSON.with(|c| c.set(false));
606        }
607    }
608    let _skip_json_guard = if !json_enabled {
609        SKIP_JSON.with(|c| c.set(true));
610        info!("JSON output skipped (not in requested formats)");
611        Some(SkipJsonGuard)
612    } else {
613        None
614    };
615
616    // ========================================================================
617    // Journal Entries (CSV + JSON in parallel when both enabled)
618    // ========================================================================
619    if !result.journal_entries.is_empty() {
620        let do_csv = csv_enabled;
621        let do_json = json_enabled;
622        let is_flat = export_layout == datasynth_config::ExportLayout::Flat;
623
624        std::thread::scope(|s| {
625            if do_csv {
626                s.spawn(|| {
627                    if let Err(e) = write_journal_entries_csv(result, output_dir) {
628                        warn!("Failed to write journal_entries.csv: {}", e);
629                    }
630                });
631                // v5.8.0 — flat edge-list for accounting-network construction.
632                // Always emit when CSV is requested; cheap relative to the
633                // main JE table.
634                s.spawn(|| {
635                    if let Err(e) = write_je_network_csv(result, output_dir) {
636                        warn!("Failed to write graphs/je_network.csv: {}", e);
637                    }
638                });
639            }
640            if do_json {
641                s.spawn(|| {
642                    if is_flat {
643                        if let Err(e) = write_journal_entries_flat_json(result, output_dir) {
644                            warn!("Failed to write flat journal_entries.json: {}", e);
645                        }
646                    } else if let Err(e) = write_json(
647                        &result.journal_entries,
648                        &output_dir.join("journal_entries.json"),
649                        "Journal entries (JSON)",
650                    ) {
651                        warn!("Failed to write journal_entries.json: {}", e);
652                    }
653                });
654            }
655        });
656    }
657
658    // ========================================================================
659    // Master Data
660    // ========================================================================
661    let md_dir = output_dir.join("master_data");
662    if !result.master_data.vendors.is_empty()
663        || !result.master_data.customers.is_empty()
664        || !result.master_data.materials.is_empty()
665        || !result.master_data.assets.is_empty()
666        || !result.master_data.employees.is_empty()
667        || !result.master_data.cost_centers.is_empty()
668        || !result.master_data.profit_centers.is_empty()
669    {
670        std::fs::create_dir_all(&md_dir)?;
671        info!("Writing master data...");
672
673        write_json_safe(
674            &result.master_data.vendors,
675            &md_dir.join("vendors.json"),
676            "Vendors",
677        );
678        write_json_safe(
679            &result.master_data.customers,
680            &md_dir.join("customers.json"),
681            "Customers",
682        );
683        write_json_safe(
684            &result.master_data.materials,
685            &md_dir.join("materials.json"),
686            "Materials",
687        );
688        write_json_safe(
689            &result.master_data.assets,
690            &md_dir.join("fixed_assets.json"),
691            "Fixed assets",
692        );
693        write_json_safe(
694            &result.master_data.employees,
695            &md_dir.join("employees.json"),
696            "Employees",
697        );
698        write_json_safe(
699            &result.master_data.cost_centers,
700            &md_dir.join("cost_centers.json"),
701            "Cost centers",
702        );
703        // v5.1: profit-centre hierarchy (segments + sub-units).
704        write_json_safe(
705            &result.master_data.profit_centers,
706            &md_dir.join("profit_centers.json"),
707            "Profit centres",
708        );
709        // v3.3.0: organizational profiles (one per company)
710        write_json_safe(
711            &result.master_data.organizational_profiles,
712            &md_dir.join("organizational_profiles.json"),
713            "Organizational profiles (v3.3.0)",
714        );
715    }
716
717    // ========================================================================
718    // Document Flows
719    // ========================================================================
720    let df_dir = output_dir.join("document_flows");
721    let flat_mode = export_layout == datasynth_config::ExportLayout::Flat;
722    if !result.document_flows.purchase_orders.is_empty()
723        || !result.document_flows.sales_orders.is_empty()
724    {
725        std::fs::create_dir_all(&df_dir)?;
726        info!("Writing document flows...");
727
728        write_json_auto(
729            &result.document_flows.purchase_orders,
730            &df_dir.join("purchase_orders.json"),
731            "Purchase orders",
732            flat_mode,
733        );
734        write_json_auto(
735            &result.document_flows.goods_receipts,
736            &df_dir.join("goods_receipts.json"),
737            "Goods receipts",
738            flat_mode,
739        );
740        write_json_auto(
741            &result.document_flows.vendor_invoices,
742            &df_dir.join("vendor_invoices.json"),
743            "Vendor invoices",
744            flat_mode,
745        );
746        write_json_auto(
747            &result.document_flows.payments,
748            &df_dir.join("payments.json"),
749            "Payments",
750            flat_mode,
751        );
752        let customer_receipts: Vec<_> = result
753            .document_flows
754            .payments
755            .iter()
756            .filter(|p| p.payment_type == PaymentType::ArReceipt)
757            .collect();
758        write_json_auto(
759            &customer_receipts,
760            &df_dir.join("customer_receipts.json"),
761            "Customer receipts",
762            flat_mode,
763        );
764        write_json_auto(
765            &result.document_flows.sales_orders,
766            &df_dir.join("sales_orders.json"),
767            "Sales orders",
768            flat_mode,
769        );
770        write_json_auto(
771            &result.document_flows.deliveries,
772            &df_dir.join("deliveries.json"),
773            "Deliveries",
774            flat_mode,
775        );
776        write_json_auto(
777            &result.document_flows.customer_invoices,
778            &df_dir.join("customer_invoices.json"),
779            "Customer invoices",
780            flat_mode,
781        );
782
783        // Document cross-references (PO→GR, GR→Invoice, Invoice→Payment, etc.).
784        // v4.4.2+: inject SDK-friendly `from_type`/`from_id`/`to_type`/`to_id`
785        // aliases so consumers that follow the graph convention see the
786        // types populated. The canonical `source_doc_*`/`target_doc_*`
787        // keys continue to emit unchanged for backwards compatibility.
788        match serde_json::to_value(&result.document_flows.document_references) {
789            Ok(mut v) => {
790                if let Some(arr) = v.as_array_mut() {
791                    for r in arr.iter_mut() {
792                        if let Some(obj) = r.as_object_mut() {
793                            if let Some(st) = obj.get("source_doc_type").cloned() {
794                                obj.entry("from_type").or_insert(st);
795                            }
796                            if let Some(si) = obj.get("source_doc_id").cloned() {
797                                obj.entry("from_id").or_insert(si);
798                            }
799                            if let Some(tt) = obj.get("target_doc_type").cloned() {
800                                obj.entry("to_type").or_insert(tt);
801                            }
802                            if let Some(ti) = obj.get("target_doc_id").cloned() {
803                                obj.entry("to_id").or_insert(ti);
804                            }
805                        }
806                    }
807                }
808                match serde_json::to_string_pretty(&v) {
809                    Ok(json) => {
810                        let path = df_dir.join("document_references.json");
811                        if let Err(e) = std::fs::write(&path, json) {
812                            warn!("Failed to write document references: {}", e);
813                        } else {
814                            info!(
815                                "  Document references written: {} records -> {}",
816                                result.document_flows.document_references.len(),
817                                path.display()
818                            );
819                        }
820                    }
821                    Err(e) => warn!("Failed to serialize document references: {}", e),
822                }
823            }
824            Err(e) => warn!("Failed to build document references Value: {}", e),
825        }
826
827        // Note: P2P/O2C chain types do not implement Serialize, so we log
828        // their counts instead. The individual documents above capture all data.
829        if !result.document_flows.p2p_chains.is_empty() {
830            info!(
831                "  P2P chains: {} (data exported via individual document files)",
832                result.document_flows.p2p_chains.len()
833            );
834        }
835        if !result.document_flows.o2c_chains.is_empty() {
836            info!(
837                "  O2C chains: {} (data exported via individual document files)",
838                result.document_flows.o2c_chains.len()
839            );
840        }
841    }
842
843    // ========================================================================
844    // Subledger
845    // ========================================================================
846    let sl_dir = output_dir.join("subledger");
847    if !result.subledger.ap_invoices.is_empty()
848        || !result.subledger.ar_invoices.is_empty()
849        || !result.subledger.fa_records.is_empty()
850        || !result.subledger.inventory_positions.is_empty()
851    {
852        std::fs::create_dir_all(&sl_dir)?;
853        info!("Writing subledger data...");
854
855        write_json_safe(
856            &result.subledger.ap_invoices,
857            &sl_dir.join("ap_invoices.json"),
858            "AP invoices",
859        );
860        write_json_safe(
861            &result.subledger.ar_invoices,
862            &sl_dir.join("ar_invoices.json"),
863            "AR invoices",
864        );
865        write_json_safe(
866            &result.subledger.fa_records,
867            &sl_dir.join("fa_records.json"),
868            "FA records",
869        );
870        write_json_safe(
871            &result.subledger.inventory_positions,
872            &sl_dir.join("inventory_positions.json"),
873            "Inventory positions",
874        );
875        write_json_safe(
876            &result.subledger.inventory_movements,
877            &sl_dir.join("inventory_movements.json"),
878            "Inventory movements",
879        );
880        write_json_safe(
881            &result.subledger.ar_aging_reports,
882            &sl_dir.join("ar_aging.json"),
883            "AR aging reports",
884        );
885        write_json_safe(
886            &result.subledger.ap_aging_reports,
887            &sl_dir.join("ap_aging.json"),
888            "AP aging reports",
889        );
890        write_json_safe(
891            &result.subledger.depreciation_runs,
892            &sl_dir.join("depreciation_runs.json"),
893            "Depreciation runs",
894        );
895        write_json_safe(
896            &result.subledger.inventory_valuations,
897            &sl_dir.join("inventory_valuation.json"),
898            "Inventory valuations",
899        );
900        // Dunning runs and letters (generated after AR aging)
901        write_json_safe(
902            &result.subledger.dunning_runs,
903            &sl_dir.join("dunning_runs.json"),
904            "Dunning runs",
905        );
906        write_json_safe(
907            &result.subledger.dunning_letters,
908            &sl_dir.join("dunning_letters.json"),
909            "Dunning letters",
910        );
911    }
912
913    // ========================================================================
914    // Audit
915    // ========================================================================
916    let audit_dir = output_dir.join("audit");
917    if !result.audit.engagements.is_empty() {
918        std::fs::create_dir_all(&audit_dir)?;
919        info!("Writing audit data...");
920
921        write_json_safe(
922            &result.audit.engagements,
923            &audit_dir.join("audit_engagements.json"),
924            "Audit engagements",
925        );
926        write_json_safe(
927            &result.audit.audit_scopes,
928            &audit_dir.join("audit_scopes.json"),
929            "Audit scopes (ISA 220 / ISA 300)",
930        );
931        write_json_safe(
932            &result.audit.workpapers,
933            &audit_dir.join("audit_workpapers.json"),
934            "Audit workpapers",
935        );
936        write_json_safe(
937            &result.audit.evidence,
938            &audit_dir.join("audit_evidence.json"),
939            "Audit evidence",
940        );
941        write_json_safe(
942            &result.audit.risk_assessments,
943            &audit_dir.join("audit_risk_assessments.json"),
944            "Audit risk assessments",
945        );
946        write_json_safe(
947            &result.audit.findings,
948            &audit_dir.join("audit_findings.json"),
949            "Audit findings",
950        );
951        write_json_safe(
952            &result.audit.judgments,
953            &audit_dir.join("audit_judgments.json"),
954            "Audit judgments",
955        );
956        write_json_safe(
957            &result.audit.confirmations,
958            &audit_dir.join("audit_confirmations.json"),
959            "Audit confirmations",
960        );
961        write_json_safe(
962            &result.audit.confirmation_responses,
963            &audit_dir.join("audit_confirmation_responses.json"),
964            "Audit confirmation responses",
965        );
966        write_json_safe(
967            &result.audit.procedure_steps,
968            &audit_dir.join("audit_procedure_steps.json"),
969            "Audit procedure steps",
970        );
971        write_json_safe(
972            &result.audit.samples,
973            &audit_dir.join("audit_samples.json"),
974            "Audit samples",
975        );
976        write_json_safe(
977            &result.audit.analytical_results,
978            &audit_dir.join("audit_analytical_results.json"),
979            "Audit analytical results",
980        );
981        write_json_safe(
982            &result.audit.ia_functions,
983            &audit_dir.join("audit_ia_functions.json"),
984            "Audit IA functions",
985        );
986        write_json_safe(
987            &result.audit.ia_reports,
988            &audit_dir.join("audit_ia_reports.json"),
989            "Audit IA reports",
990        );
991        write_json_safe(
992            &result.audit.related_parties,
993            &audit_dir.join("audit_related_parties.json"),
994            "Audit related parties",
995        );
996        write_json_safe(
997            &result.audit.related_party_transactions,
998            &audit_dir.join("audit_related_party_transactions.json"),
999            "Audit related party transactions",
1000        );
1001        // ISA 600: Group audit artefacts
1002        if !result.audit.component_auditors.is_empty() {
1003            write_json_safe(
1004                &result.audit.component_auditors,
1005                &audit_dir.join("component_auditors.json"),
1006                "Component auditors (ISA 600)",
1007            );
1008            if let Some(plan) = &result.audit.group_audit_plan {
1009                write_json_single_safe(
1010                    plan,
1011                    &audit_dir.join("group_audit_plan.json"),
1012                    "Group audit plan (ISA 600)",
1013                );
1014            }
1015            write_json_safe(
1016                &result.audit.component_instructions,
1017                &audit_dir.join("component_instructions.json"),
1018                "Component instructions (ISA 600)",
1019            );
1020            write_json_safe(
1021                &result.audit.component_reports,
1022                &audit_dir.join("component_reports.json"),
1023                "Component auditor reports (ISA 600)",
1024            );
1025        }
1026        // ISA 210: Engagement letters
1027        write_json_safe(
1028            &result.audit.engagement_letters,
1029            &audit_dir.join("engagement_letters.json"),
1030            "Engagement letters (ISA 210)",
1031        );
1032        // ISA 560 / IAS 10: Subsequent events
1033        write_json_safe(
1034            &result.audit.subsequent_events,
1035            &audit_dir.join("subsequent_events.json"),
1036            "Subsequent events (ISA 560 / IAS 10)",
1037        );
1038        // ISA 402: Service organization controls
1039        write_json_safe(
1040            &result.audit.service_organizations,
1041            &audit_dir.join("service_organizations.json"),
1042            "Service organizations (ISA 402)",
1043        );
1044        write_json_safe(
1045            &result.audit.soc_reports,
1046            &audit_dir.join("soc_reports.json"),
1047            "SOC reports (ISA 402)",
1048        );
1049        write_json_safe(
1050            &result.audit.user_entity_controls,
1051            &audit_dir.join("user_entity_controls.json"),
1052            "User entity controls (ISA 402)",
1053        );
1054
1055        // ISA 570: Going concern assessments
1056        write_json_safe(
1057            &result.audit.going_concern_assessments,
1058            &audit_dir.join("going_concern_assessments.json"),
1059            "Going concern assessments (ISA 570)",
1060        );
1061
1062        // ISA 540: Accounting estimates
1063        write_json_safe(
1064            &result.audit.accounting_estimates,
1065            &audit_dir.join("accounting_estimates.json"),
1066            "Accounting estimates (ISA 540)",
1067        );
1068
1069        // ISA 700/701/705/706: Audit opinions and Key Audit Matters.
1070        // Always write even if the vec is empty; see the always-emit block
1071        // below (outside the `engagements.is_empty()` guard) for the case
1072        // where audit is entirely disabled — the files still appear in the
1073        // archive with `[]` so SDK consumers don't get 404s on the manifest.
1074        write_json_always(
1075            &result.audit.audit_opinions,
1076            &audit_dir.join("audit_opinions.json"),
1077            "Audit opinions (ISA 700/705/706)",
1078        );
1079        write_json_always(
1080            &result.audit.key_audit_matters,
1081            &audit_dir.join("key_audit_matters.json"),
1082            "Key Audit Matters (ISA 701)",
1083        );
1084
1085        // SOX 302 / 404
1086        if !result.audit.sox_302_certifications.is_empty() {
1087            write_json_safe(
1088                &result.audit.sox_302_certifications,
1089                &audit_dir.join("sox_302_certifications.json"),
1090                "SOX 302 certifications",
1091            );
1092            write_json_safe(
1093                &result.audit.sox_404_assessments,
1094                &audit_dir.join("sox_404_assessments.json"),
1095                "SOX 404 ICFR assessments",
1096            );
1097        }
1098
1099        // ISA 320: Materiality calculations
1100        if !result.audit.materiality_calculations.is_empty() {
1101            write_json_safe(
1102                &result.audit.materiality_calculations,
1103                &audit_dir.join("materiality_calculations.json"),
1104                "Materiality calculations (ISA 320)",
1105            );
1106        }
1107
1108        // ISA 315: Combined Risk Assessments
1109        if !result.audit.combined_risk_assessments.is_empty() {
1110            write_json_safe(
1111                &result.audit.combined_risk_assessments,
1112                &audit_dir.join("combined_risk_assessments.json"),
1113                "Combined Risk Assessments (ISA 315)",
1114            );
1115        }
1116
1117        // ISA 530: Sampling Plans and Sampled Items
1118        if !result.audit.sampling_plans.is_empty() {
1119            write_json_safe(
1120                &result.audit.sampling_plans,
1121                &audit_dir.join("sampling_plans.json"),
1122                "Sampling plans (ISA 530)",
1123            );
1124            write_json_safe(
1125                &result.audit.sampled_items,
1126                &audit_dir.join("sampled_items.json"),
1127                "Sampled items (ISA 530)",
1128            );
1129        }
1130
1131        // ISA 315: Significant Classes of Transactions (SCOTS)
1132        if !result.audit.significant_transaction_classes.is_empty() {
1133            write_json_safe(
1134                &result.audit.significant_transaction_classes,
1135                &audit_dir.join("significant_transaction_classes.json"),
1136                "Significant Classes of Transactions / SCOTS (ISA 315)",
1137            );
1138        }
1139
1140        // ISA 520: Unusual Item Markers
1141        if !result.audit.unusual_items.is_empty() {
1142            write_json_safe(
1143                &result.audit.unusual_items,
1144                &audit_dir.join("unusual_items.json"),
1145                "Unusual item flags (ISA 520)",
1146            );
1147        }
1148
1149        // ISA 520: Analytical Relationships
1150        if !result.audit.analytical_relationships.is_empty() {
1151            write_json_safe(
1152                &result.audit.analytical_relationships,
1153                &audit_dir.join("analytical_relationships.json"),
1154                "Analytical relationships (ISA 520)",
1155            );
1156        }
1157
1158        // PCAOB-ISA cross-reference mappings
1159        if !result.audit.isa_pcaob_mappings.is_empty() {
1160            write_json_safe(
1161                &result.audit.isa_pcaob_mappings,
1162                &audit_dir.join("isa_pcaob_mappings.json"),
1163                "PCAOB-ISA standard mappings",
1164            );
1165        }
1166
1167        // ISA standard reference (number, title, series for all 34 ISA standards)
1168        if !result.audit.isa_mappings.is_empty() {
1169            write_json_safe(
1170                &result.audit.isa_mappings,
1171                &audit_dir.join("isa_mappings.json"),
1172                "ISA standard reference mappings",
1173            );
1174        }
1175
1176        // FSM event trail (when audit.fsm.enabled: true)
1177        if let Some(ref event_trail) = result.audit.fsm_event_trail {
1178            if !event_trail.is_empty() {
1179                write_json_safe(
1180                    event_trail,
1181                    &audit_dir.join("fsm_event_trail.json"),
1182                    "FSM audit event trail",
1183                );
1184            }
1185        }
1186
1187        // v3.3.0: legal documents (when compliance_regulations.legal_documents.enabled)
1188        write_json_safe(
1189            &result.audit.legal_documents,
1190            &audit_dir.join("legal_documents.json"),
1191            "Legal documents (v3.3.0)",
1192        );
1193
1194        // v3.3.0: IT general controls — access logs + change records
1195        write_json_safe(
1196            &result.audit.it_controls_access_logs,
1197            &audit_dir.join("it_controls_access_logs.json"),
1198            "IT general controls — access logs (v3.3.0)",
1199        );
1200        write_json_safe(
1201            &result.audit.it_controls_change_records,
1202            &audit_dir.join("it_controls_change_records.json"),
1203            "IT general controls — change management records (v3.3.0)",
1204        );
1205    } else {
1206        // Audit phase disabled or ran with no engagements — still emit
1207        // audit_opinions.json + key_audit_matters.json so the archive
1208        // structure is consistent and SDK consumers can rely on these
1209        // files always existing. v3.1 announced these as archive-shipping
1210        // files; v3.1.1 guarantees it regardless of audit.enabled.
1211        std::fs::create_dir_all(&audit_dir)?;
1212        write_json_always(
1213            &result.audit.audit_opinions,
1214            &audit_dir.join("audit_opinions.json"),
1215            "Audit opinions (ISA 700/705/706) — empty (audit phase disabled)",
1216        );
1217        write_json_always(
1218            &result.audit.key_audit_matters,
1219            &audit_dir.join("key_audit_matters.json"),
1220            "Key Audit Matters (ISA 701) — empty (audit phase disabled)",
1221        );
1222    }
1223
1224    // ========================================================================
1225    // Banking (JSON - keep existing format for backward compat)
1226    // ========================================================================
1227    let banking_dir = output_dir.join("banking");
1228    if !result.banking.customers.is_empty() {
1229        std::fs::create_dir_all(&banking_dir)?;
1230        info!("Writing banking data...");
1231
1232        // v4.4.2: dual-key risk tier. SDK consumers inspect `risk_level`;
1233        // the struct stores it as `risk_tier` for historical reasons.
1234        // Serialize through a `serde_json::Value` so we can inject the
1235        // `risk_level` alias key on every customer row without touching
1236        // the `BankingCustomer` Serialize impl (which has 40+ fields).
1237        match serde_json::to_value(&result.banking.customers) {
1238            Ok(mut v) => {
1239                if let Some(arr) = v.as_array_mut() {
1240                    for c in arr.iter_mut() {
1241                        if let Some(obj) = c.as_object_mut() {
1242                            if let Some(rt) = obj.get("risk_tier").cloned() {
1243                                obj.entry("risk_level").or_insert(rt);
1244                            }
1245                        }
1246                    }
1247                }
1248                match serde_json::to_string_pretty(&v) {
1249                    Ok(json) => {
1250                        let path = banking_dir.join("banking_customers.json");
1251                        if let Err(e) = std::fs::write(&path, json) {
1252                            warn!("Failed to write banking_customers.json: {}", e);
1253                        } else {
1254                            info!(
1255                                "  Banking customers written: {} records -> {}",
1256                                result.banking.customers.len(),
1257                                path.display()
1258                            );
1259                        }
1260                    }
1261                    Err(e) => warn!("Failed to serialize banking customers: {}", e),
1262                }
1263            }
1264            Err(e) => warn!("Failed to build banking customers Value: {}", e),
1265        }
1266        write_json_safe(
1267            &result.banking.accounts,
1268            &banking_dir.join("banking_accounts.json"),
1269            "Banking accounts",
1270        );
1271        write_json_safe(
1272            &result.banking.transactions,
1273            &banking_dir.join("banking_transactions.json"),
1274            "Banking transactions",
1275        );
1276        write_json_safe(
1277            &result.banking.transaction_labels,
1278            &banking_dir.join("aml_transaction_labels.json"),
1279            "AML transaction labels",
1280        );
1281        write_json_safe(
1282            &result.banking.customer_labels,
1283            &banking_dir.join("aml_customer_labels.json"),
1284            "AML customer labels",
1285        );
1286        write_json_safe(
1287            &result.banking.account_labels,
1288            &banking_dir.join("aml_account_labels.json"),
1289            "AML account labels",
1290        );
1291        write_json_safe(
1292            &result.banking.relationship_labels,
1293            &banking_dir.join("aml_relationship_labels.json"),
1294            "AML relationship labels",
1295        );
1296        write_json_safe(
1297            &result.banking.narratives,
1298            &banking_dir.join("aml_narratives.json"),
1299            "AML narratives",
1300        );
1301    }
1302
1303    // ========================================================================
1304    // Sourcing (S2C)
1305    // ========================================================================
1306    let s2c_dir = output_dir.join("sourcing");
1307    if !result.sourcing.spend_analyses.is_empty() || !result.sourcing.sourcing_projects.is_empty() {
1308        std::fs::create_dir_all(&s2c_dir)?;
1309        info!("Writing sourcing (S2C) data...");
1310
1311        write_json_safe(
1312            &result.sourcing.spend_analyses,
1313            &s2c_dir.join("spend_analyses.json"),
1314            "Spend analyses",
1315        );
1316        write_json_safe(
1317            &result.sourcing.sourcing_projects,
1318            &s2c_dir.join("sourcing_projects.json"),
1319            "Sourcing projects",
1320        );
1321        write_json_safe(
1322            &result.sourcing.qualifications,
1323            &s2c_dir.join("supplier_qualifications.json"),
1324            "Supplier qualifications",
1325        );
1326        write_json_safe(
1327            &result.sourcing.rfx_events,
1328            &s2c_dir.join("rfx_events.json"),
1329            "RFx events",
1330        );
1331        write_json_safe(
1332            &result.sourcing.bids,
1333            &s2c_dir.join("supplier_bids.json"),
1334            "Supplier bids",
1335        );
1336        write_json_safe(
1337            &result.sourcing.bid_evaluations,
1338            &s2c_dir.join("bid_evaluations.json"),
1339            "Bid evaluations",
1340        );
1341        write_json_safe(
1342            &result.sourcing.contracts,
1343            &s2c_dir.join("procurement_contracts.json"),
1344            "Procurement contracts",
1345        );
1346        write_json_safe(
1347            &result.sourcing.catalog_items,
1348            &s2c_dir.join("catalog_items.json"),
1349            "Catalog items",
1350        );
1351        write_json_safe(
1352            &result.sourcing.scorecards,
1353            &s2c_dir.join("supplier_scorecards.json"),
1354            "Supplier scorecards",
1355        );
1356    }
1357
1358    // ========================================================================
1359    // Intercompany
1360    // ========================================================================
1361    let ic_dir = output_dir.join("intercompany");
1362    if result.intercompany.group_structure.is_some()
1363        || !result.intercompany.matched_pairs.is_empty()
1364    {
1365        std::fs::create_dir_all(&ic_dir)?;
1366        info!("Writing intercompany data...");
1367
1368        // Always write group structure when present (independent of IC transactions).
1369        if let Some(gs) = &result.intercompany.group_structure {
1370            write_json_single_safe(gs, &ic_dir.join("group_structure.json"), "Group structure");
1371        }
1372
1373        write_json_safe(
1374            &result.intercompany.matched_pairs,
1375            &ic_dir.join("ic_matched_pairs.json"),
1376            "IC matched pairs",
1377        );
1378        write_json_safe(
1379            &result.intercompany.seller_journal_entries,
1380            &ic_dir.join("ic_seller_journal_entries.json"),
1381            "IC seller journal entries",
1382        );
1383        write_json_safe(
1384            &result.intercompany.buyer_journal_entries,
1385            &ic_dir.join("ic_buyer_journal_entries.json"),
1386            "IC buyer journal entries",
1387        );
1388        write_json_safe(
1389            &result.intercompany.elimination_entries,
1390            &ic_dir.join("ic_elimination_entries.json"),
1391            "IC elimination entries",
1392        );
1393
1394        // NCI measurements from group structure ownership percentages
1395        if !result.intercompany.nci_measurements.is_empty() {
1396            write_json_safe(
1397                &result.intercompany.nci_measurements,
1398                &ic_dir.join("nci_measurements.json"),
1399                "NCI measurements",
1400            );
1401        }
1402    }
1403
1404    // ========================================================================
1405    // Financial Reporting
1406    // ========================================================================
1407    let fin_dir = output_dir.join("financial_reporting");
1408    if !result.financial_reporting.financial_statements.is_empty()
1409        || !result.financial_reporting.bank_reconciliations.is_empty()
1410        || !result
1411            .financial_reporting
1412            .consolidated_statements
1413            .is_empty()
1414    {
1415        std::fs::create_dir_all(&fin_dir)?;
1416        info!("Writing financial reporting data...");
1417
1418        // Legacy flat file (all standalone statements combined)
1419        write_json_safe(
1420            &result.financial_reporting.financial_statements,
1421            &fin_dir.join("financial_statements.json"),
1422            "Financial statements",
1423        );
1424
1425        // Per-entity standalone statements
1426        if !result.financial_reporting.standalone_statements.is_empty() {
1427            let standalone_dir = fin_dir.join("standalone");
1428            std::fs::create_dir_all(&standalone_dir)?;
1429            for (entity_code, stmts) in &result.financial_reporting.standalone_statements {
1430                let file_name = format!("{}_financial_statements.json", entity_code);
1431                write_json_safe(
1432                    stmts,
1433                    &standalone_dir.join(&file_name),
1434                    &format!("Standalone statements for {}", entity_code),
1435                );
1436            }
1437        }
1438
1439        // Consolidated statements + schedule
1440        if !result
1441            .financial_reporting
1442            .consolidated_statements
1443            .is_empty()
1444            || !result
1445                .financial_reporting
1446                .consolidation_schedules
1447                .is_empty()
1448        {
1449            let consolidated_dir = fin_dir.join("consolidated");
1450            std::fs::create_dir_all(&consolidated_dir)?;
1451            write_json_safe(
1452                &result.financial_reporting.consolidated_statements,
1453                &consolidated_dir.join("consolidated_financial_statements.json"),
1454                "Consolidated financial statements",
1455            );
1456            write_json_safe(
1457                &result.financial_reporting.consolidation_schedules,
1458                &consolidated_dir.join("consolidation_schedule.json"),
1459                "Consolidation schedule",
1460            );
1461        }
1462
1463        write_json_safe(
1464            &result.financial_reporting.bank_reconciliations,
1465            &fin_dir.join("bank_reconciliations.json"),
1466            "Bank reconciliations",
1467        );
1468
1469        // IFRS 8 / ASC 280 Segment Reporting
1470        if !result.financial_reporting.segment_reports.is_empty()
1471            || !result
1472                .financial_reporting
1473                .segment_reconciliations
1474                .is_empty()
1475        {
1476            let seg_dir = fin_dir.join("segment_reporting");
1477            std::fs::create_dir_all(&seg_dir)?;
1478            write_json_safe(
1479                &result.financial_reporting.segment_reports,
1480                &seg_dir.join("segment_reports.json"),
1481                "Segment reports",
1482            );
1483            write_json_safe(
1484                &result.financial_reporting.segment_reconciliations,
1485                &seg_dir.join("segment_reconciliations.json"),
1486                "Segment reconciliations",
1487            );
1488        }
1489
1490        // IAS 1 / ASC 235: Notes to financial statements
1491        write_json_safe(
1492            &result.financial_reporting.notes_to_financial_statements,
1493            &fin_dir.join("notes_to_financial_statements.json"),
1494            "Notes to financial statements",
1495        );
1496    }
1497
1498    // ========================================================================
1499    // Period-Close Trial Balances
1500    // ========================================================================
1501    //
1502    // v5.1: convert each in-memory `PeriodTrialBalance` to the
1503    // canonical `datasynth_core::models::balance::TrialBalance` before
1504    // writing.  The on-disk shape is now identical to what the group
1505    // aggregate phase loads via `tb_loader::load_entity_trial_balance`,
1506    // so the loader's v5.0 dual-shape detection (`PeriodTrialBalanceOnDisk`
1507    // → `TrialBalance` synthesis) is no longer required.
1508    if !result.financial_reporting.trial_balances.is_empty() {
1509        let pc_dir = output_dir.join("period_close");
1510        std::fs::create_dir_all(&pc_dir)?;
1511        info!(
1512            "Writing {} period-close trial balances...",
1513            result.financial_reporting.trial_balances.len()
1514        );
1515        // Pick the first JE's company_code + currency as the
1516        // canonical identifiers; the orchestrator only emits one TB
1517        // per period (gated by `if company_idx == 0` at the push
1518        // site), so all trial-balance entries belong to that company.
1519        // Fallback to safe defaults when the JE list is empty
1520        // (effectively only test fixtures).
1521        let (company_code, currency) = result
1522            .journal_entries
1523            .first()
1524            .map(|je| (je.header.company_code.as_str(), je.header.currency.as_str()))
1525            .unwrap_or(("UNKNOWN", "USD"));
1526        let canonical: Vec<datasynth_core::models::balance::TrialBalance> = result
1527            .financial_reporting
1528            .trial_balances
1529            .iter()
1530            .cloned()
1531            .map(|tb| tb.into_canonical(company_code, currency))
1532            .collect();
1533        write_json_safe(
1534            &canonical,
1535            &pc_dir.join("trial_balances.json"),
1536            "Period-close trial balances (canonical)",
1537        );
1538    }
1539
1540    // ========================================================================
1541    // Balance: Opening Balances + GL-Subledger Reconciliation
1542    // ========================================================================
1543    if !result.opening_balances.is_empty() || !result.subledger_reconciliation.is_empty() {
1544        let balance_dir = output_dir.join("balance");
1545        std::fs::create_dir_all(&balance_dir)?;
1546        info!("Writing balance data...");
1547
1548        write_json_safe(
1549            &result.opening_balances,
1550            &balance_dir.join("opening_balances.json"),
1551            "Opening balances",
1552        );
1553        write_json_safe(
1554            &result.subledger_reconciliation,
1555            &balance_dir.join("subledger_reconciliation.json"),
1556            "Subledger reconciliation",
1557        );
1558    }
1559
1560    // ========================================================================
1561    // HR (Payroll, Time Entries, Expense Reports, Benefit Enrollments, Pensions)
1562    // ========================================================================
1563    let hr_dir = output_dir.join("hr");
1564    if !result.hr.payroll_runs.is_empty()
1565        || !result.hr.time_entries.is_empty()
1566        || !result.hr.expense_reports.is_empty()
1567        || !result.hr.benefit_enrollments.is_empty()
1568        || !result.hr.pension_plans.is_empty()
1569        || !result.hr.stock_grants.is_empty()
1570        || !result.master_data.employee_change_history.is_empty()
1571    {
1572        std::fs::create_dir_all(&hr_dir)?;
1573        info!("Writing HR data...");
1574
1575        write_json_safe(
1576            &result.hr.payroll_runs,
1577            &hr_dir.join("payroll_runs.json"),
1578            "Payroll runs",
1579        );
1580        write_json_safe(
1581            &result.hr.payroll_line_items,
1582            &hr_dir.join("payroll_line_items.json"),
1583            "Payroll line items",
1584        );
1585        write_json_safe(
1586            &result.hr.time_entries,
1587            &hr_dir.join("time_entries.json"),
1588            "Time entries",
1589        );
1590        write_json_safe(
1591            &result.hr.expense_reports,
1592            &hr_dir.join("expense_reports.json"),
1593            "Expense reports",
1594        );
1595        write_json_safe(
1596            &result.hr.benefit_enrollments,
1597            &hr_dir.join("benefit_enrollments.json"),
1598            "Benefit enrollments",
1599        );
1600        write_json_safe(
1601            &result.hr.pension_plans,
1602            &hr_dir.join("pension_plans.json"),
1603            "Pension plans",
1604        );
1605        write_json_safe(
1606            &result.hr.pension_obligations,
1607            &hr_dir.join("pension_obligations.json"),
1608            "Pension obligations",
1609        );
1610        write_json_safe(
1611            &result.hr.pension_plan_assets,
1612            &hr_dir.join("plan_assets.json"),
1613            "Plan assets",
1614        );
1615        write_json_safe(
1616            &result.hr.pension_disclosures,
1617            &hr_dir.join("pension_disclosures.json"),
1618            "Pension disclosures",
1619        );
1620        write_json_safe(
1621            &result.hr.stock_grants,
1622            &hr_dir.join("stock_grants.json"),
1623            "Stock grants",
1624        );
1625        write_json_safe(
1626            &result.hr.stock_comp_expenses,
1627            &hr_dir.join("stock_comp_expense.json"),
1628            "Stock comp expense",
1629        );
1630        write_json_safe(
1631            &result.master_data.employee_change_history,
1632            &hr_dir.join("employee_change_history.json"),
1633            "Employee change history",
1634        );
1635    }
1636
1637    // ========================================================================
1638    // Manufacturing
1639    // ========================================================================
1640    let mfg_dir = output_dir.join("manufacturing");
1641    if !result.manufacturing.production_orders.is_empty()
1642        || !result.manufacturing.quality_inspections.is_empty()
1643        || !result.manufacturing.cycle_counts.is_empty()
1644        || !result.manufacturing.bom_components.is_empty()
1645        || !result.manufacturing.inventory_movements.is_empty()
1646    {
1647        std::fs::create_dir_all(&mfg_dir)?;
1648        info!("Writing manufacturing data...");
1649
1650        write_json_safe(
1651            &result.manufacturing.production_orders,
1652            &mfg_dir.join("production_orders.json"),
1653            "Production orders",
1654        );
1655        write_json_safe(
1656            &result.manufacturing.quality_inspections,
1657            &mfg_dir.join("quality_inspections.json"),
1658            "Quality inspections",
1659        );
1660        write_json_safe(
1661            &result.manufacturing.cycle_counts,
1662            &mfg_dir.join("cycle_counts.json"),
1663            "Cycle counts",
1664        );
1665        write_json_safe(
1666            &result.manufacturing.bom_components,
1667            &mfg_dir.join("bom_components.json"),
1668            "BOM components",
1669        );
1670        write_json_safe(
1671            &result.manufacturing.inventory_movements,
1672            &mfg_dir.join("inventory_movements.json"),
1673            "Inventory movements",
1674        );
1675    }
1676
1677    // ========================================================================
1678    // Sales, KPIs, Budgets
1679    // ========================================================================
1680    let sales_dir = output_dir.join("sales_kpi_budgets");
1681    if !result.sales_kpi_budgets.sales_quotes.is_empty()
1682        || !result.sales_kpi_budgets.kpis.is_empty()
1683        || !result.sales_kpi_budgets.budgets.is_empty()
1684    {
1685        std::fs::create_dir_all(&sales_dir)?;
1686        info!("Writing sales, KPI, and budget data...");
1687
1688        write_json_safe(
1689            &result.sales_kpi_budgets.sales_quotes,
1690            &sales_dir.join("sales_quotes.json"),
1691            "Sales quotes",
1692        );
1693        write_json_safe(
1694            &result.sales_kpi_budgets.kpis,
1695            &sales_dir.join("management_kpis.json"),
1696            "Management KPIs",
1697        );
1698        write_json_safe(
1699            &result.sales_kpi_budgets.budgets,
1700            &sales_dir.join("budgets.json"),
1701            "Budgets",
1702        );
1703    }
1704
1705    // ========================================================================
1706    // Tax
1707    // ========================================================================
1708    let tax_dir = output_dir.join("tax");
1709    if !result.tax.jurisdictions.is_empty()
1710        || !result.tax.codes.is_empty()
1711        || !result.tax.tax_provisions.is_empty()
1712    {
1713        std::fs::create_dir_all(&tax_dir)?;
1714        info!("Writing tax data...");
1715
1716        write_json_safe(
1717            &result.tax.jurisdictions,
1718            &tax_dir.join("tax_jurisdictions.json"),
1719            "Tax jurisdictions",
1720        );
1721        write_json_safe(
1722            &result.tax.codes,
1723            &tax_dir.join("tax_codes.json"),
1724            "Tax codes",
1725        );
1726        write_json_safe(
1727            &result.tax.tax_provisions,
1728            &tax_dir.join("tax_provisions.json"),
1729            "Tax provisions",
1730        );
1731        write_json_safe(
1732            &result.tax.tax_lines,
1733            &tax_dir.join("tax_lines.json"),
1734            "Tax lines",
1735        );
1736        write_json_safe(
1737            &result.tax.tax_returns,
1738            &tax_dir.join("tax_returns.json"),
1739            "Tax returns",
1740        );
1741        write_json_safe(
1742            &result.tax.withholding_records,
1743            &tax_dir.join("withholding_records.json"),
1744            "Withholding tax records",
1745        );
1746        if !result.tax.tax_anomaly_labels.is_empty() {
1747            write_json_safe(
1748                &result.tax.tax_anomaly_labels,
1749                &tax_dir.join("tax_anomaly_labels.json"),
1750                "Tax anomaly labels",
1751            );
1752        }
1753        // Deferred tax engine output (IAS 12 / ASC 740)
1754        if !result.tax.deferred_tax.temporary_differences.is_empty() {
1755            write_json_safe(
1756                &result.tax.deferred_tax.temporary_differences,
1757                &tax_dir.join("temporary_differences.json"),
1758                "Temporary differences",
1759            );
1760            write_json_safe(
1761                &result.tax.deferred_tax.etr_reconciliations,
1762                &tax_dir.join("etr_reconciliation.json"),
1763                "ETR reconciliation",
1764            );
1765            write_json_safe(
1766                &result.tax.deferred_tax.rollforwards,
1767                &tax_dir.join("deferred_tax_rollforward.json"),
1768                "Deferred tax rollforward",
1769            );
1770            write_json_safe(
1771                &result.tax.deferred_tax.journal_entries,
1772                &tax_dir.join("deferred_tax_journal_entries.json"),
1773                "Deferred tax journal entries",
1774            );
1775        }
1776    }
1777
1778    // ========================================================================
1779    // ESG
1780    // ========================================================================
1781    let esg_dir = output_dir.join("esg");
1782    if !result.esg.emissions.is_empty()
1783        || !result.esg.energy.is_empty()
1784        || !result.esg.diversity.is_empty()
1785        || !result.esg.governance.is_empty()
1786    {
1787        std::fs::create_dir_all(&esg_dir)?;
1788        info!("Writing ESG data...");
1789
1790        write_json_safe(
1791            &result.esg.emissions,
1792            &esg_dir.join("emission_records.json"),
1793            "Emission records",
1794        );
1795        write_json_safe(
1796            &result.esg.energy,
1797            &esg_dir.join("energy_consumption.json"),
1798            "Energy consumption",
1799        );
1800        write_json_safe(
1801            &result.esg.water,
1802            &esg_dir.join("water_usage.json"),
1803            "Water usage",
1804        );
1805        write_json_safe(
1806            &result.esg.waste,
1807            &esg_dir.join("waste_records.json"),
1808            "Waste records",
1809        );
1810        write_json_safe(
1811            &result.esg.diversity,
1812            &esg_dir.join("workforce_diversity.json"),
1813            "Workforce diversity",
1814        );
1815        write_json_safe(
1816            &result.esg.pay_equity,
1817            &esg_dir.join("pay_equity.json"),
1818            "Pay equity",
1819        );
1820        write_json_safe(
1821            &result.esg.safety_incidents,
1822            &esg_dir.join("safety_incidents.json"),
1823            "Safety incidents",
1824        );
1825        write_json_safe(
1826            &result.esg.safety_metrics,
1827            &esg_dir.join("safety_metrics.json"),
1828            "Safety metrics",
1829        );
1830        write_json_safe(
1831            &result.esg.governance,
1832            &esg_dir.join("governance_metrics.json"),
1833            "Governance metrics",
1834        );
1835        write_json_safe(
1836            &result.esg.supplier_assessments,
1837            &esg_dir.join("supplier_esg_assessments.json"),
1838            "Supplier ESG assessments",
1839        );
1840        write_json_safe(
1841            &result.esg.materiality,
1842            &esg_dir.join("materiality_assessments.json"),
1843            "Materiality assessments",
1844        );
1845        write_json_safe(
1846            &result.esg.disclosures,
1847            &esg_dir.join("esg_disclosures.json"),
1848            "ESG disclosures",
1849        );
1850        write_json_safe(
1851            &result.esg.climate_scenarios,
1852            &esg_dir.join("climate_scenarios.json"),
1853            "Climate scenarios",
1854        );
1855        write_json_safe(
1856            &result.esg.anomaly_labels,
1857            &esg_dir.join("esg_anomaly_labels.json"),
1858            "ESG anomaly labels",
1859        );
1860    }
1861
1862    // ========================================================================
1863    // Process Mining (OCPM)
1864    // ========================================================================
1865    if let Some(ref event_log) = result.ocpm.event_log {
1866        if !event_log.events.is_empty() || !event_log.objects.is_empty() {
1867            let pm_dir = output_dir.join("process_mining");
1868            std::fs::create_dir_all(&pm_dir)?;
1869            info!("Writing process mining (OCPM) data...");
1870
1871            // Write the full OCEL 2.0 event log. v4.4.2+ patches every
1872            // `object_refs[*].object_type_id` with a companion
1873            // `object_type` key, matching the OCEL 2.0 spec and SDK
1874            // consumer expectations that previously saw `object_type`
1875            // arrive as null. See `add_ocel_object_type_alias` below.
1876            match serde_json::to_value(event_log) {
1877                Ok(mut v) => {
1878                    add_ocel_object_type_alias(&mut v);
1879                    match serde_json::to_string_pretty(&v) {
1880                        Ok(json) => {
1881                            if let Err(e) = std::fs::write(pm_dir.join("event_log.json"), json) {
1882                                warn!("Failed to write OCPM event log: {}", e);
1883                            } else {
1884                                info!(
1885                                    "  Event log written: {} events, {} objects",
1886                                    result.ocpm.event_count, result.ocpm.object_count
1887                                );
1888                            }
1889                        }
1890                        Err(e) => warn!("Failed to serialize OCPM event log: {}", e),
1891                    }
1892                }
1893                Err(e) => warn!("Failed to build OCPM event log Value: {}", e),
1894            }
1895
1896            // Write events separately for easy consumption
1897            if !event_log.events.is_empty() {
1898                match serde_json::to_string_pretty(&event_log.events) {
1899                    Ok(json) => {
1900                        if let Err(e) = std::fs::write(pm_dir.join("events.json"), json) {
1901                            warn!("Failed to write OCPM events: {}", e);
1902                        } else {
1903                            info!("  Events written: {} records", event_log.events.len());
1904                        }
1905                    }
1906                    Err(e) => warn!("Failed to serialize OCPM events: {}", e),
1907                }
1908            }
1909
1910            // Write objects separately for easy consumption
1911            if !event_log.objects.is_empty() {
1912                let objects: Vec<&_> = event_log.objects.iter().collect();
1913                match serde_json::to_string_pretty(&objects) {
1914                    Ok(json) => {
1915                        if let Err(e) = std::fs::write(pm_dir.join("objects.json"), json) {
1916                            warn!("Failed to write OCPM objects: {}", e);
1917                        } else {
1918                            info!("  Objects written: {} records", event_log.objects.len());
1919                        }
1920                    }
1921                    Err(e) => warn!("Failed to serialize OCPM objects: {}", e),
1922                }
1923            }
1924
1925            // Write process variants if any were computed
1926            if !event_log.variants.is_empty() {
1927                let variants: Vec<&_> = event_log.variants.values().collect();
1928                match serde_json::to_string_pretty(&variants) {
1929                    Ok(json) => {
1930                        if let Err(e) = std::fs::write(pm_dir.join("process_variants.json"), json) {
1931                            warn!("Failed to write process variants: {}", e);
1932                        } else {
1933                            info!(
1934                                "  Process variants written: {} variants",
1935                                event_log.variants.len()
1936                            );
1937                        }
1938                    }
1939                    Err(e) => warn!("Failed to serialize process variants: {}", e),
1940                }
1941            }
1942        }
1943    }
1944
1945    // ========================================================================
1946    // Chart of Accounts
1947    // ========================================================================
1948    // Primary file: flat array of accounts (shape stable since v3.x —
1949    // consumers iterate over it).
1950    match serde_json::to_string_pretty(&result.chart_of_accounts.accounts) {
1951        Ok(json) => {
1952            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts.json"), json) {
1953                warn!("Failed to write chart of accounts: {}", e);
1954            } else {
1955                info!("  Chart of accounts written");
1956            }
1957        }
1958        Err(e) => warn!("Failed to serialize chart of accounts: {}", e),
1959    }
1960    // v4.4.1 — companion metadata file so SDK consumers can read the
1961    // accounting framework + complexity + ID without having to infer
1962    // them from each account row. The SDK team flagged
1963    // `CoA.accounting_framework` arriving as null in v4.1.x; the field
1964    // didn't exist at all until v4.4.1.
1965    let coa_meta = serde_json::json!({
1966        "coa_id": result.chart_of_accounts.coa_id,
1967        "name": result.chart_of_accounts.name,
1968        "country": result.chart_of_accounts.country,
1969        "industry": result.chart_of_accounts.industry,
1970        "complexity": result.chart_of_accounts.complexity,
1971        "account_format": result.chart_of_accounts.account_format,
1972        "accounting_framework": result.chart_of_accounts.accounting_framework,
1973        "account_count": result.chart_of_accounts.accounts.len(),
1974    });
1975    match serde_json::to_string_pretty(&coa_meta) {
1976        Ok(json) => {
1977            if let Err(e) = std::fs::write(output_dir.join("chart_of_accounts_meta.json"), json) {
1978                warn!("Failed to write CoA metadata: {}", e);
1979            } else {
1980                info!(
1981                    "  Chart of accounts metadata written (accounting_framework: {:?})",
1982                    result.chart_of_accounts.accounting_framework
1983                );
1984            }
1985        }
1986        Err(e) => warn!("Failed to serialize CoA metadata: {}", e),
1987    }
1988
1989    // ========================================================================
1990    // Balance Validation Summary
1991    // ========================================================================
1992    if result.balance_validation.validated {
1993        match serde_json::to_string_pretty(&BalanceValidationSummary::from(
1994            &result.balance_validation,
1995        )) {
1996            Ok(json) => {
1997                if let Err(e) = std::fs::write(output_dir.join("balance_validation.json"), json) {
1998                    warn!("Failed to write balance validation: {}", e);
1999                } else {
2000                    info!("  Balance validation summary written");
2001                }
2002            }
2003            Err(e) => warn!("Failed to serialize balance validation: {}", e),
2004        }
2005    }
2006
2007    // ========================================================================
2008    // Data Quality Statistics (now serializable directly via Serialize derives)
2009    // ========================================================================
2010    {
2011        match serde_json::to_string_pretty(&result.data_quality_stats) {
2012            Ok(json) => {
2013                if let Err(e) = std::fs::write(output_dir.join("data_quality_stats.json"), json) {
2014                    warn!("Failed to write data quality stats: {}", e);
2015                } else {
2016                    info!("  Data quality stats written (full detail)");
2017                }
2018            }
2019            Err(e) => warn!("Failed to serialize data quality stats: {}", e),
2020        }
2021    }
2022
2023    // ========================================================================
2024    // v3.3.0: Analytics-metadata phase outputs (prior year, industry
2025    // benchmarks, management reports, drift events).
2026    // ========================================================================
2027    {
2028        let am = &result.analytics_metadata;
2029        if !am.prior_year_comparatives.is_empty()
2030            || !am.industry_benchmarks.is_empty()
2031            || !am.management_reports.is_empty()
2032            || !am.drift_events.is_empty()
2033        {
2034            let analytics_dir = output_dir.join("analytics");
2035            std::fs::create_dir_all(&analytics_dir)?;
2036            write_json_safe(
2037                &am.prior_year_comparatives,
2038                &analytics_dir.join("prior_year_comparatives.json"),
2039                "Prior-year comparatives (v3.3.0)",
2040            );
2041            write_json_safe(
2042                &am.industry_benchmarks,
2043                &analytics_dir.join("industry_benchmarks.json"),
2044                "Industry benchmarks (v3.3.0)",
2045            );
2046            write_json_safe(
2047                &am.management_reports,
2048                &analytics_dir.join("management_reports.json"),
2049                "Management reports (v3.3.0)",
2050            );
2051            write_json_safe(
2052                &am.drift_events,
2053                &analytics_dir.join("drift_events.json"),
2054                "Drift event labels (v3.3.0)",
2055            );
2056        }
2057    }
2058
2059    // ========================================================================
2060    // Pre-built Analytics (Benford, amount distribution, process variants)
2061    // ========================================================================
2062    {
2063        let analytics_dir = output_dir.join("analytics");
2064
2065        // Collect non-zero amounts from journal entry lines
2066        let amounts: Vec<_> = result
2067            .journal_entries
2068            .iter()
2069            .flat_map(|je| je.lines.iter())
2070            .flat_map(|line| {
2071                let d = (!line.debit_amount.is_zero()).then_some(line.debit_amount);
2072                let c = (!line.credit_amount.is_zero()).then_some(line.credit_amount);
2073                d.into_iter().chain(c)
2074            })
2075            .collect();
2076
2077        if amounts.len() >= 10 {
2078            std::fs::create_dir_all(&analytics_dir)?;
2079            info!("Writing pre-built analytics ({} amounts)...", amounts.len());
2080
2081            // Benford's Law analysis
2082            let benford_analyzer = datasynth_eval::BenfordAnalyzer::default();
2083            match benford_analyzer.analyze(&amounts) {
2084                Ok(ref benford_result) => {
2085                    if let Ok(json) = serde_json::to_string_pretty(benford_result) {
2086                        if let Err(e) =
2087                            std::fs::write(analytics_dir.join("benford_analysis.json"), json)
2088                        {
2089                            warn!("Failed to write Benford analysis: {}", e);
2090                        } else {
2091                            info!(
2092                                "  Benford analysis written (conformity: {:?}, MAD: {:.4})",
2093                                benford_result.conformity, benford_result.mad
2094                            );
2095                        }
2096                    }
2097                }
2098                Err(e) => warn!("Benford analysis skipped: {}", e),
2099            }
2100
2101            // Amount distribution analysis
2102            let amount_analyzer = datasynth_eval::AmountDistributionAnalyzer::new();
2103            match amount_analyzer.analyze(&amounts) {
2104                Ok(ref dist_result) => {
2105                    if let Ok(json) = serde_json::to_string_pretty(dist_result) {
2106                        if let Err(e) =
2107                            std::fs::write(analytics_dir.join("amount_distribution.json"), json)
2108                        {
2109                            warn!("Failed to write amount distribution: {}", e);
2110                        } else {
2111                            info!(
2112                                "  Amount distribution written (skewness: {:.2}, kurtosis: {:.2})",
2113                                dist_result.skewness, dist_result.kurtosis
2114                            );
2115                        }
2116                    }
2117                }
2118                Err(e) => warn!("Amount distribution analysis skipped: {}", e),
2119            }
2120        }
2121
2122        // Process variant summary (from OCPM event log).
2123        //
2124        // v3.1.1 — always emit the file when an event_log exists. When the
2125        // event_log has no pre-computed `variants` map (older OCPM phases
2126        // didn't populate it), derive variants on the fly from the raw
2127        // events so SDK consumers see `analytics/process_variant_summary.json`
2128        // in every archive rather than `null`. Without this, the v3.1
2129        // claim that the file exists was only true when OCPM happened to
2130        // populate its variants map.
2131        if let Some(ref event_log) = result.ocpm.event_log {
2132            std::fs::create_dir_all(&analytics_dir)?;
2133            let variant_data: Vec<datasynth_eval::VariantData> = if !event_log.variants.is_empty() {
2134                event_log
2135                    .variants
2136                    .values()
2137                    .map(|v| datasynth_eval::VariantData {
2138                        variant_id: v.variant_id.clone(),
2139                        case_count: v.frequency as usize,
2140                        is_happy_path: v.is_happy_path,
2141                    })
2142                    .collect()
2143            } else {
2144                // Fallback: derive variants from raw events by case_id.
2145                // Each case's activity sequence (by activity_id) defines a
2146                // variant; cases with the same sequence collapse into one
2147                // variant. Events without a case_id are skipped since they
2148                // can't be grouped into a process instance.
2149                use std::collections::HashMap;
2150                // Key by case_id's string form to avoid pulling the uuid
2151                // crate into the output writer's dependency graph.
2152                let mut per_case: HashMap<String, Vec<String>> = HashMap::new();
2153                for ev in &event_log.events {
2154                    if let Some(case_id) = ev.case_id {
2155                        per_case
2156                            .entry(case_id.to_string())
2157                            .or_default()
2158                            .push(ev.activity_id.clone());
2159                    }
2160                }
2161                let mut variant_counts: HashMap<Vec<String>, usize> = HashMap::new();
2162                for activities in per_case.into_values() {
2163                    *variant_counts.entry(activities).or_insert(0) += 1;
2164                }
2165                // Happy path heuristic: the highest-frequency variant.
2166                let max_count = variant_counts.values().copied().max().unwrap_or(0);
2167                variant_counts
2168                    .into_iter()
2169                    .enumerate()
2170                    .map(|(i, (seq, count))| datasynth_eval::VariantData {
2171                        variant_id: format!("V{i:04}:{}", seq.join("->")),
2172                        case_count: count,
2173                        is_happy_path: count == max_count && max_count > 0,
2174                    })
2175                    .collect()
2176            };
2177
2178            let variant_analyzer = datasynth_eval::VariantAnalyzer::new();
2179            match variant_analyzer.analyze(&variant_data) {
2180                Ok(ref variant_result) => {
2181                    if let Ok(json) = serde_json::to_string_pretty(variant_result) {
2182                        if let Err(e) =
2183                            std::fs::write(analytics_dir.join("process_variant_summary.json"), json)
2184                        {
2185                            warn!("Failed to write variant summary: {}", e);
2186                        } else {
2187                            info!(
2188                                "  Process variant summary written ({} variants, entropy: {:.2})",
2189                                variant_result.variant_count, variant_result.variant_entropy
2190                            );
2191                        }
2192                    }
2193                }
2194                Err(e) => {
2195                    // Even on analyzer error, emit a minimal JSON placeholder
2196                    // so the file always exists in the archive.
2197                    warn!("Variant analysis failed: {}; emitting empty summary", e);
2198                    let placeholder = serde_json::json!({
2199                        "variant_count": 0,
2200                        "variant_entropy": null,
2201                        "happy_path_concentration": null,
2202                        "top_variants": [],
2203                        "passes": false,
2204                        "issues": [format!("analyzer error: {e}")],
2205                    });
2206                    if let Ok(json) = serde_json::to_string_pretty(&placeholder) {
2207                        let _ = std::fs::write(
2208                            analytics_dir.join("process_variant_summary.json"),
2209                            json,
2210                        );
2211                    }
2212                }
2213            }
2214        }
2215
2216        // Banking evaluation (KYC completeness + AML detectability).
2217        // Matches the payload served by /v1/jobs/{id}/analytics so
2218        // archive-mode consumers see the same four files the endpoint returns.
2219        if !result.banking.customers.is_empty() {
2220            use datasynth_core::models::banking::BankingCustomerType;
2221            use datasynth_eval::banking::{
2222                AmlDetectabilityAnalyzer, AmlTransactionData, BankingEvaluation,
2223                KycCompletenessAnalyzer, KycProfileData, TypologyData,
2224            };
2225            use std::collections::HashMap;
2226            std::fs::create_dir_all(&analytics_dir)?;
2227
2228            let kyc_data: Vec<KycProfileData> = result
2229                .banking
2230                .customers
2231                .iter()
2232                .map(|c| KycProfileData {
2233                    profile_id: c.customer_id.to_string(),
2234                    has_name: true,
2235                    has_dob: c.date_of_birth.is_some(),
2236                    has_address: c.address_line1.is_some(),
2237                    has_id_document: c.national_id.is_some() || c.passport_number.is_some(),
2238                    has_risk_rating: true,
2239                    has_beneficial_owner: !c.beneficial_owners.is_empty(),
2240                    is_entity: c.customer_type == BankingCustomerType::Business,
2241                    is_verified: c.kyc_truthful,
2242                })
2243                .collect();
2244
2245            let mut banking_eval = BankingEvaluation::new();
2246            if let Ok(kyc_res) = KycCompletenessAnalyzer::new().analyze(&kyc_data) {
2247                banking_eval.kyc = Some(kyc_res);
2248            }
2249
2250            let suspicious: Vec<&_> = result
2251                .banking
2252                .transactions
2253                .iter()
2254                .filter(|t| t.is_suspicious)
2255                .collect();
2256            if !suspicious.is_empty() {
2257                // Use AmlTypology::canonical_name() so the evaluator's
2258                // exact-string match against EXPECTED_TYPOLOGIES succeeds.
2259                // Prior to v3.1.1 we used `format!("{:?}", r)` (Debug /
2260                // PascalCase) which never matched the lowercase expected
2261                // names and produced "typology_coverage = 0.000" in every
2262                // run regardless of actual typology injection.
2263                let aml_data: Vec<AmlTransactionData> = suspicious
2264                    .iter()
2265                    .map(|t| AmlTransactionData {
2266                        transaction_id: t.transaction_id.to_string(),
2267                        typology: t
2268                            .suspicion_reason
2269                            .as_ref()
2270                            .map(|r| r.canonical_name().to_string())
2271                            .unwrap_or_default(),
2272                        case_id: t.case_id.clone().unwrap_or_default(),
2273                        amount: t.amount.try_into().unwrap_or(0.0),
2274                        is_flagged: t.is_suspicious,
2275                    })
2276                    .collect();
2277
2278                let mut typology_map: HashMap<String, (usize, HashMap<String, bool>)> =
2279                    HashMap::new();
2280                for txn in &aml_data {
2281                    if !txn.typology.is_empty() {
2282                        let entry = typology_map
2283                            .entry(txn.typology.clone())
2284                            .or_insert_with(|| (0, HashMap::new()));
2285                        entry.0 += 1;
2286                        entry.1.insert(txn.case_id.clone(), true);
2287                    }
2288                }
2289                let typology_data: Vec<TypologyData> = typology_map
2290                    .iter()
2291                    .map(|(name, (count, cases))| TypologyData {
2292                        name: name.clone(),
2293                        scenario_count: *count,
2294                        case_ids_consistent: cases.len() <= *count,
2295                    })
2296                    .collect();
2297
2298                if let Ok(aml_res) =
2299                    AmlDetectabilityAnalyzer::new().analyze(&aml_data, &typology_data)
2300                {
2301                    banking_eval.aml = Some(aml_res);
2302                }
2303            }
2304            banking_eval.check_thresholds();
2305
2306            match serde_json::to_string_pretty(&banking_eval) {
2307                Ok(json) => {
2308                    if let Err(e) =
2309                        std::fs::write(analytics_dir.join("banking_evaluation.json"), json)
2310                    {
2311                        warn!("Failed to write banking evaluation: {}", e);
2312                    } else {
2313                        info!(
2314                            "  Banking evaluation written ({} profiles, {} issues, passes={})",
2315                            result.banking.customers.len(),
2316                            banking_eval.issues.len(),
2317                            banking_eval.passes
2318                        );
2319                    }
2320                }
2321                Err(e) => warn!("Failed to serialize banking evaluation: {}", e),
2322            }
2323        }
2324    }
2325
2326    // ========================================================================
2327    // Data Quality Issue Records + Quality Labels
2328    // ========================================================================
2329    if !result.quality_issues.is_empty() {
2330        let labels_dir = output_dir.join("labels");
2331        std::fs::create_dir_all(&labels_dir)?;
2332        info!("Writing data quality issue records...");
2333        write_json_safe(
2334            &result.quality_issues,
2335            &labels_dir.join("quality_issues.json"),
2336            "Data quality issues",
2337        );
2338
2339        // Derive quality_labels.json from quality_issues: maps each QualityIssue
2340        // to a QualityIssueLabel with the corresponding LabeledIssueType and severity.
2341        use datasynth_generators::{
2342            LabeledIssueType, QualityIssueLabel, QualityIssueType, QualityLabels,
2343        };
2344        let mut quality_labels = QualityLabels::with_capacity(result.quality_issues.len());
2345        for issue in &result.quality_issues {
2346            let labeled_type = match issue.issue_type {
2347                QualityIssueType::MissingValue => LabeledIssueType::MissingValue,
2348                QualityIssueType::Typo => LabeledIssueType::Typo,
2349                QualityIssueType::DateFormatVariation
2350                | QualityIssueType::AmountFormatVariation
2351                | QualityIssueType::IdentifierFormatVariation
2352                | QualityIssueType::TextFormatVariation => LabeledIssueType::FormatVariation,
2353                QualityIssueType::ExactDuplicate
2354                | QualityIssueType::NearDuplicate
2355                | QualityIssueType::FuzzyDuplicate => LabeledIssueType::Duplicate,
2356                QualityIssueType::EncodingIssue => LabeledIssueType::EncodingIssue,
2357            };
2358            let mut label = QualityIssueLabel::new(
2359                labeled_type,
2360                issue.record_id.clone(),
2361                issue.field.clone().unwrap_or_else(|| "_record".to_string()),
2362                "data_quality_injector",
2363            );
2364            if let Some(ref orig) = issue.original_value {
2365                label = label.with_original(orig.clone());
2366            }
2367            if let Some(ref modified) = issue.modified_value {
2368                label = label.with_modified(modified.clone());
2369            }
2370            quality_labels.add(label);
2371        }
2372        if let Ok(json) = serde_json::to_string_pretty(&quality_labels) {
2373            if let Err(e) = std::fs::write(labels_dir.join("quality_labels.json"), json.as_bytes())
2374            {
2375                warn!("Failed to write quality labels: {}", e);
2376            } else {
2377                info!(
2378                    "  Quality labels written: {} labels -> labels/quality_labels.json",
2379                    quality_labels.len()
2380                );
2381            }
2382        }
2383    }
2384
2385    // ========================================================================
2386    // Internal Controls
2387    // ========================================================================
2388    if !result.internal_controls.is_empty() || !result.sod_violations.is_empty() {
2389        let ctrl_dir = output_dir.join("internal_controls");
2390        std::fs::create_dir_all(&ctrl_dir)?;
2391        info!("Writing internal controls data...");
2392
2393        write_json_safe(
2394            &result.internal_controls,
2395            &ctrl_dir.join("internal_controls.json"),
2396            "Internal controls",
2397        );
2398        // SoD violations extracted from control-annotated journal entries
2399        write_json_safe(
2400            &result.sod_violations,
2401            &ctrl_dir.join("sod_violations.json"),
2402            "SoD violations",
2403        );
2404
2405        // SoD conflict pairs, SoD rules, control mappings, and COSO control mapping
2406        // are static reference data — export via ControlExporter regardless of whether
2407        // individual violations were generated so the master catalog is always present.
2408        let exporter = datasynth_output::ControlExporter::new(&ctrl_dir);
2409        match exporter.export_standard() {
2410            Ok(summary) => {
2411                info!(
2412                    "  Control master data written: {} controls, {} SoD conflicts, {} SoD rules, {} COSO mappings, {} account mappings",
2413                    summary.controls_count,
2414                    summary.sod_conflicts_count,
2415                    summary.sod_rules_count,
2416                    summary.coso_mappings_count,
2417                    summary.account_mappings_count,
2418                );
2419            }
2420            Err(e) => warn!("Failed to write control master data: {}", e),
2421        }
2422    }
2423
2424    // ========================================================================
2425    // Accounting Standards
2426    // ========================================================================
2427    if !result.accounting_standards.contracts.is_empty()
2428        || !result.accounting_standards.impairment_tests.is_empty()
2429        || !result.accounting_standards.business_combinations.is_empty()
2430        || !result.accounting_standards.ecl_models.is_empty()
2431        || !result.accounting_standards.provisions.is_empty()
2432        || !result
2433            .accounting_standards
2434            .currency_translation_results
2435            .is_empty()
2436    {
2437        let acct_dir = output_dir.join("accounting_standards");
2438        std::fs::create_dir_all(&acct_dir)?;
2439        info!("Writing accounting standards data...");
2440
2441        write_json_safe(
2442            &result.accounting_standards.contracts,
2443            &acct_dir.join("customer_contracts.json"),
2444            "Customer contracts",
2445        );
2446        write_json_safe(
2447            &result.accounting_standards.impairment_tests,
2448            &acct_dir.join("impairment_tests.json"),
2449            "Impairment tests",
2450        );
2451        write_json_safe(
2452            &result.accounting_standards.business_combinations,
2453            &acct_dir.join("business_combinations.json"),
2454            "Business combinations",
2455        );
2456        write_json_safe(
2457            &result
2458                .accounting_standards
2459                .business_combination_journal_entries,
2460            &acct_dir.join("business_combination_journal_entries.json"),
2461            "Business combination journal entries",
2462        );
2463        write_json_safe(
2464            &result.accounting_standards.ecl_models,
2465            &acct_dir.join("ecl_models.json"),
2466            "ECL models",
2467        );
2468        write_json_safe(
2469            &result.accounting_standards.ecl_provision_movements,
2470            &acct_dir.join("ecl_provision_movements.json"),
2471            "ECL provision movements",
2472        );
2473        write_json_safe(
2474            &result.accounting_standards.ecl_journal_entries,
2475            &acct_dir.join("ecl_journal_entries.json"),
2476            "ECL journal entries",
2477        );
2478        write_json_safe(
2479            &result.accounting_standards.provisions,
2480            &acct_dir.join("provisions.json"),
2481            "Provisions (IAS 37 / ASC 450)",
2482        );
2483        write_json_safe(
2484            &result.accounting_standards.provision_movements,
2485            &acct_dir.join("provision_movements.json"),
2486            "Provision movements",
2487        );
2488        write_json_safe(
2489            &result.accounting_standards.contingent_liabilities,
2490            &acct_dir.join("contingent_liabilities.json"),
2491            "Contingent liabilities",
2492        );
2493        write_json_safe(
2494            &result.accounting_standards.provision_journal_entries,
2495            &acct_dir.join("provision_journal_entries.json"),
2496            "Provision journal entries",
2497        );
2498
2499        // IAS 21 — write under accounting_standards/fx/
2500        if !result
2501            .accounting_standards
2502            .currency_translation_results
2503            .is_empty()
2504        {
2505            let fx_dir = acct_dir.join("fx");
2506            std::fs::create_dir_all(&fx_dir)?;
2507            write_json_safe(
2508                &result.accounting_standards.currency_translation_results,
2509                &fx_dir.join("currency_translation_results.json"),
2510                "IAS 21 currency translation results",
2511            );
2512        }
2513
2514        // v3.3.1: Leases (IFRS 16 / ASC 842)
2515        if !result.accounting_standards.leases.is_empty() {
2516            let leases_dir = acct_dir.join("leases");
2517            std::fs::create_dir_all(&leases_dir)?;
2518            write_json_safe(
2519                &result.accounting_standards.leases,
2520                &leases_dir.join("leases.json"),
2521                "Leases (IFRS 16 / ASC 842) — v3.3.1",
2522            );
2523        }
2524
2525        // v3.3.1: Fair value measurements (IFRS 13 / ASC 820)
2526        if !result
2527            .accounting_standards
2528            .fair_value_measurements
2529            .is_empty()
2530        {
2531            let fv_dir = acct_dir.join("fair_value");
2532            std::fs::create_dir_all(&fv_dir)?;
2533            write_json_safe(
2534                &result.accounting_standards.fair_value_measurements,
2535                &fv_dir.join("fair_value_measurements.json"),
2536                "Fair value measurements (IFRS 13 / ASC 820) — v3.3.1",
2537            );
2538        }
2539
2540        // v3.3.1: Framework reconciliation (dual reporting)
2541        if !result.accounting_standards.framework_differences.is_empty() {
2542            let diff_dir = acct_dir.join("framework_differences");
2543            std::fs::create_dir_all(&diff_dir)?;
2544            write_json_safe(
2545                &result.accounting_standards.framework_differences,
2546                &diff_dir.join("framework_differences.json"),
2547                "Framework differences (US GAAP vs IFRS) — v3.3.1",
2548            );
2549            write_json_safe(
2550                &result.accounting_standards.framework_reconciliations,
2551                &diff_dir.join("framework_reconciliations.json"),
2552                "Per-entity framework reconciliation — v3.3.1",
2553            );
2554        }
2555    }
2556
2557    // ========================================================================
2558    // Quality Gate Results
2559    // ========================================================================
2560    if let Some(ref gate_result) = result.gate_result {
2561        match serde_json::to_string_pretty(gate_result) {
2562            Ok(json) => {
2563                if let Err(e) = std::fs::write(output_dir.join("quality_gate_result.json"), json) {
2564                    warn!("Failed to write quality gate result: {}", e);
2565                } else {
2566                    info!(
2567                        "  Quality gate result written (passed={})",
2568                        gate_result.passed
2569                    );
2570                }
2571            }
2572            Err(e) => warn!("Failed to serialize quality gate result: {}", e),
2573        }
2574    }
2575
2576    // ========================================================================
2577    // Treasury
2578    // ========================================================================
2579    if !result.treasury.debt_instruments.is_empty()
2580        || !result.treasury.cash_positions.is_empty()
2581        || !result.treasury.hedging_instruments.is_empty()
2582    {
2583        let treasury_dir = output_dir.join("treasury");
2584        std::fs::create_dir_all(&treasury_dir)?;
2585        info!("Writing treasury data...");
2586
2587        write_json_safe(
2588            &result.treasury.debt_instruments,
2589            &treasury_dir.join("debt_instruments.json"),
2590            "Debt instruments",
2591        );
2592        write_json_safe(
2593            &result.treasury.hedging_instruments,
2594            &treasury_dir.join("hedging_instruments.json"),
2595            "Hedging instruments",
2596        );
2597        write_json_safe(
2598            &result.treasury.hedge_relationships,
2599            &treasury_dir.join("hedge_relationships.json"),
2600            "Hedge relationships",
2601        );
2602        write_json_safe(
2603            &result.treasury.cash_positions,
2604            &treasury_dir.join("cash_positions.json"),
2605            "Cash positions",
2606        );
2607        write_json_safe(
2608            &result.treasury.cash_forecasts,
2609            &treasury_dir.join("cash_forecasts.json"),
2610            "Cash forecasts",
2611        );
2612        write_json_safe(
2613            &result.treasury.cash_pools,
2614            &treasury_dir.join("cash_pools.json"),
2615            "Cash pools",
2616        );
2617        write_json_safe(
2618            &result.treasury.cash_pool_sweeps,
2619            &treasury_dir.join("cash_pool_sweeps.json"),
2620            "Cash pool sweeps",
2621        );
2622        write_json_safe(
2623            &result.treasury.bank_guarantees,
2624            &treasury_dir.join("bank_guarantees.json"),
2625            "Bank guarantees",
2626        );
2627        write_json_safe(
2628            &result.treasury.netting_runs,
2629            &treasury_dir.join("netting_runs.json"),
2630            "Netting runs",
2631        );
2632        if !result.treasury.treasury_anomaly_labels.is_empty() {
2633            write_json_safe(
2634                &result.treasury.treasury_anomaly_labels,
2635                &treasury_dir.join("treasury_anomaly_labels.json"),
2636                "Treasury anomaly labels",
2637            );
2638        }
2639    }
2640
2641    // ========================================================================
2642    // Project Accounting
2643    // ========================================================================
2644    if !result.project_accounting.projects.is_empty() {
2645        let pa_dir = output_dir.join("project_accounting");
2646        std::fs::create_dir_all(&pa_dir)?;
2647        info!("Writing project accounting data...");
2648
2649        write_json_safe(
2650            &result.project_accounting.projects,
2651            &pa_dir.join("projects.json"),
2652            "Projects",
2653        );
2654        write_json_safe(
2655            &result.project_accounting.cost_lines,
2656            &pa_dir.join("cost_lines.json"),
2657            "Project cost lines",
2658        );
2659        write_json_safe(
2660            &result.project_accounting.revenue_records,
2661            &pa_dir.join("revenue_records.json"),
2662            "Project revenue records",
2663        );
2664        write_json_safe(
2665            &result.project_accounting.earned_value_metrics,
2666            &pa_dir.join("earned_value_metrics.json"),
2667            "Earned value metrics",
2668        );
2669        write_json_safe(
2670            &result.project_accounting.change_orders,
2671            &pa_dir.join("change_orders.json"),
2672            "Change orders",
2673        );
2674        write_json_safe(
2675            &result.project_accounting.milestones,
2676            &pa_dir.join("milestones.json"),
2677            "Project milestones",
2678        );
2679    }
2680
2681    // ========================================================================
2682    // Evolution Events (Process Evolution + Organizational Events)
2683    // ========================================================================
2684    if !result.process_evolution.is_empty()
2685        || !result.organizational_events.is_empty()
2686        || !result.disruption_events.is_empty()
2687    {
2688        let events_dir = output_dir.join("events");
2689        std::fs::create_dir_all(&events_dir)?;
2690        info!("Writing evolution events...");
2691
2692        write_json_safe(
2693            &result.process_evolution,
2694            &events_dir.join("process_evolution_events.json"),
2695            "Process evolution events",
2696        );
2697        write_json_safe(
2698            &result.organizational_events,
2699            &events_dir.join("organizational_events.json"),
2700            "Organizational events",
2701        );
2702        write_json_safe(
2703            &result.disruption_events,
2704            &events_dir.join("disruption_events.json"),
2705            "Disruption events",
2706        );
2707    }
2708
2709    // ========================================================================
2710    // ML Training: Counterfactual Pairs
2711    // ========================================================================
2712    if !result.counterfactual_pairs.is_empty() {
2713        let ml_dir = output_dir.join("ml_training");
2714        std::fs::create_dir_all(&ml_dir)?;
2715        info!("Writing ML training data...");
2716
2717        write_json_safe(
2718            &result.counterfactual_pairs,
2719            &ml_dir.join("counterfactual_pairs.json"),
2720            "Counterfactual pairs",
2721        );
2722    }
2723
2724    // ========================================================================
2725    // Fraud Red-Flag Indicators
2726    // ========================================================================
2727    if !result.red_flags.is_empty() {
2728        let labels_dir = output_dir.join("labels");
2729        std::fs::create_dir_all(&labels_dir)?;
2730        info!("Writing fraud red-flag indicators...");
2731
2732        write_json_safe(
2733            &result.red_flags,
2734            &labels_dir.join("fraud_red_flags.json"),
2735            "Fraud red flags",
2736        );
2737    }
2738
2739    // ========================================================================
2740    // Collusion Rings
2741    // ========================================================================
2742    if !result.collusion_rings.is_empty() {
2743        let labels_dir = output_dir.join("labels");
2744        std::fs::create_dir_all(&labels_dir)?;
2745        info!("Writing collusion rings...");
2746
2747        write_json_safe(
2748            &result.collusion_rings,
2749            &labels_dir.join("collusion_rings.json"),
2750            "Collusion rings",
2751        );
2752    }
2753
2754    // ========================================================================
2755    // Temporal Vendor Version Chains
2756    // ========================================================================
2757    if !result.temporal_vendor_chains.is_empty() {
2758        let temporal_dir = output_dir.join("temporal");
2759        std::fs::create_dir_all(&temporal_dir)?;
2760        info!("Writing temporal vendor version chains...");
2761
2762        write_json_safe(
2763            &result.temporal_vendor_chains,
2764            &temporal_dir.join("vendor_version_chains.json"),
2765            "Vendor version chains",
2766        );
2767    }
2768
2769    // ========================================================================
2770    // Entity Relationship Graph + Cross-Process Links
2771    // ========================================================================
2772    if result.entity_relationship_graph.is_some() || !result.cross_process_links.is_empty() {
2773        let rel_dir = output_dir.join("relationships");
2774        std::fs::create_dir_all(&rel_dir)?;
2775        info!("Writing entity relationship data...");
2776
2777        if let Some(ref graph) = result.entity_relationship_graph {
2778            match serde_json::to_string_pretty(graph) {
2779                Ok(json) => {
2780                    let path = rel_dir.join("entity_relationship_graph.json");
2781                    if let Err(e) = std::fs::write(&path, json) {
2782                        warn!("Failed to write entity relationship graph: {}", e);
2783                    } else {
2784                        info!(
2785                            "  Entity relationship graph written: {} nodes, {} edges -> {}",
2786                            graph.nodes.len(),
2787                            graph.edges.len(),
2788                            path.display()
2789                        );
2790                    }
2791                }
2792                Err(e) => warn!("Failed to serialize entity relationship graph: {}", e),
2793            }
2794        }
2795
2796        write_json_safe(
2797            &result.cross_process_links,
2798            &rel_dir.join("cross_process_links.json"),
2799            "Cross-process links",
2800        );
2801    }
2802
2803    // ========================================================================
2804    // Industry-Specific Data
2805    // ========================================================================
2806    if let Some(ref industry_output) = result.industry_output {
2807        if !industry_output.gl_accounts.is_empty() {
2808            let industry_dir = output_dir.join("industry");
2809            std::fs::create_dir_all(&industry_dir).ok();
2810            info!("Writing industry-specific data...");
2811            match serde_json::to_string_pretty(industry_output) {
2812                Ok(json) => {
2813                    if let Err(e) = std::fs::write(industry_dir.join("industry_data.json"), json) {
2814                        warn!("Failed to write industry data: {}", e);
2815                    } else {
2816                        info!(
2817                            "  Industry data written: {} GL accounts for {}",
2818                            industry_output.gl_accounts.len(),
2819                            industry_output.industry
2820                        );
2821                    }
2822                }
2823                Err(e) => warn!("Failed to serialize industry data: {}", e),
2824            }
2825        }
2826    }
2827
2828    // ========================================================================
2829    // Graph Export Summary
2830    // ========================================================================
2831    if result.graph_export.exported {
2832        let graph_dir = output_dir.join("graph_export");
2833        std::fs::create_dir_all(&graph_dir).ok();
2834        match serde_json::to_string_pretty(&result.graph_export) {
2835            Ok(json) => {
2836                if let Err(e) = std::fs::write(graph_dir.join("graph_export_summary.json"), json) {
2837                    warn!("Failed to write graph export summary: {}", e);
2838                } else {
2839                    info!("  Graph export summary written");
2840                }
2841            }
2842            Err(e) => warn!("Failed to serialize graph export summary: {}", e),
2843        }
2844    }
2845
2846    // ========================================================================
2847    // Compliance Regulations
2848    // ========================================================================
2849    let cr = &result.compliance_regulations;
2850    let has_compliance_data = !cr.standard_records.is_empty()
2851        || !cr.audit_procedures.is_empty()
2852        || !cr.findings.is_empty()
2853        || !cr.filings.is_empty();
2854    if has_compliance_data {
2855        let cr_dir = output_dir.join("compliance_regulations");
2856        std::fs::create_dir_all(&cr_dir)?;
2857        info!("Writing compliance regulations data...");
2858
2859        write_json_safe(
2860            &cr.standard_records,
2861            &cr_dir.join("compliance_standards.json"),
2862            "Compliance standards",
2863        );
2864        write_json_safe(
2865            &cr.cross_reference_records,
2866            &cr_dir.join("cross_references.json"),
2867            "Cross-references",
2868        );
2869        write_json_safe(
2870            &cr.jurisdiction_records,
2871            &cr_dir.join("jurisdiction_profiles.json"),
2872            "Jurisdiction profiles",
2873        );
2874        write_json_safe(
2875            &cr.audit_procedures,
2876            &cr_dir.join("audit_procedures.json"),
2877            "Audit procedures",
2878        );
2879        write_json_safe(
2880            &cr.findings,
2881            &cr_dir.join("compliance_findings.json"),
2882            "Compliance findings",
2883        );
2884        write_json_safe(
2885            &cr.filings,
2886            &cr_dir.join("regulatory_filings.json"),
2887            "Regulatory filings",
2888        );
2889
2890        if let Some(ref graph) = cr.compliance_graph {
2891            match serde_json::to_string_pretty(graph) {
2892                Ok(json) => {
2893                    if let Err(e) = std::fs::write(cr_dir.join("compliance_graph.json"), json) {
2894                        warn!("Failed to write compliance graph: {}", e);
2895                    } else {
2896                        info!(
2897                            "  Compliance graph written: {} nodes, {} edges",
2898                            graph.nodes.len(),
2899                            graph.edges.len()
2900                        );
2901                    }
2902                }
2903                Err(e) => warn!("Failed to serialize compliance graph: {}", e),
2904            }
2905        }
2906    }
2907
2908    // ========================================================================
2909    // Generation Statistics
2910    // ========================================================================
2911    match serde_json::to_string_pretty(&result.statistics) {
2912        Ok(json) => {
2913            if let Err(e) = std::fs::write(output_dir.join("generation_statistics.json"), json) {
2914                warn!("Failed to write generation statistics: {}", e);
2915            } else {
2916                info!("  Generation statistics written");
2917            }
2918        }
2919        Err(e) => warn!("Failed to serialize generation statistics: {}", e),
2920    }
2921
2922    info!("Output writing complete.");
2923    Ok(())
2924}
2925
2926/// Write JSON with error handling - logs a warning on failure but does not abort.
2927///
2928/// When the `FLAT_LAYOUT_ACTIVE` thread-local is true (set by
2929/// `write_all_output_with_layout` when `export_layout: flat`), this routes
2930/// through `write_json_flat` so nested `{header, lines|items|allocations}`
2931/// shapes are automatically flattened. For structures without that shape,
2932/// `write_json_flat` passes through unchanged.
2933fn write_json_safe<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2934    // Skip JSON entirely when not in requested output formats
2935    if SKIP_JSON.with(|c| c.get()) {
2936        return;
2937    }
2938    if FLAT_LAYOUT_ACTIVE.with(|c| c.get()) {
2939        write_json_flat(data, path, label);
2940    } else if let Err(e) = write_json(data, path, label) {
2941        warn!("Failed to write {}: {}", label, e);
2942    }
2943}
2944
2945/// Write JSON, choosing flat or nested layout based on the flag.
2946fn write_json_auto<T: serde::Serialize>(data: &[T], path: &Path, label: &str, flat: bool) {
2947    if flat {
2948        write_json_flat(data, path, label);
2949    } else {
2950        write_json_safe(data, path, label);
2951    }
2952}
2953
2954/// Write a JSON file ALWAYS, even when the slice is empty (writes `[]`).
2955///
2956/// Use for files that must exist in the archive for SDK consumers
2957/// (e.g., `audit_opinions.json`) regardless of whether the phase that
2958/// populates them ran. `write_json_safe` / `write_json` short-circuit
2959/// on empty slices, which would break manifest-driven clients that
2960/// expect the file to be present.
2961fn write_json_always<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
2962    if SKIP_JSON.with(|c| c.get()) {
2963        return;
2964    }
2965    match std::fs::File::create(path) {
2966        Ok(file) => {
2967            let mut writer = std::io::BufWriter::with_capacity(64 * 1024, file);
2968            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
2969                writer.write_all(b"[\n")?;
2970                for (i, item) in data.iter().enumerate() {
2971                    if i > 0 {
2972                        writer.write_all(b",\n")?;
2973                    }
2974                    serde_json::to_writer_pretty(&mut writer, item)?;
2975                }
2976                if !data.is_empty() {
2977                    writer.write_all(b"\n")?;
2978                }
2979                writer.write_all(b"]\n")?;
2980                writer.flush()?;
2981                Ok(())
2982            })() {
2983                warn!("Failed to write {}: {}", label, e);
2984            } else {
2985                info!(
2986                    "  {} written: {} records -> {}",
2987                    label,
2988                    data.len(),
2989                    path.display()
2990                );
2991            }
2992        }
2993        Err(e) => {
2994            warn!("Failed to create {}: {}", path.display(), e);
2995        }
2996    }
2997}
2998
2999/// Write a flat JSON file by expanding a primary items array and merging the
3000/// surrounding context onto each line.
3001///
3002/// Flattens any record that contains a recognised items array
3003/// (`items`, `lines`, `line_items`, or `allocations`) into one row per line,
3004/// carrying over both the optional `header` sub-object and all other
3005/// top-level fields. Records without a recognised items array are emitted
3006/// as-is, except that an optional nested `header` sub-object is unwrapped
3007/// onto the top level so consumers see a uniformly flat shape.
3008///
3009/// Flow-style documents (`{header, items}`) and subledger-style documents
3010/// (`{..top-level scalars.., lines}`, e.g. AP/AR invoices, inventory
3011/// valuation runs) are both handled — fixing the SDK-team-reported gap
3012/// where subledger invoices were left with `lines` nested in flat mode.
3013///
3014/// Uses heap-allocated intermediates to avoid stack overflow with large
3015/// records in constrained environments (e.g., distroless containers with
3016/// glibc 2.36). Fixes #116.
3017fn write_json_flat<T: serde::Serialize>(data: &[T], path: &Path, label: &str) {
3018    if data.is_empty() {
3019        return;
3020    }
3021
3022    // Pre-allocate on heap — avoid flat_map closure accumulating on the stack
3023    let mut flat: Vec<serde_json::Value> = Vec::with_capacity(data.len());
3024
3025    for item in data {
3026        let val = match serde_json::to_value(item) {
3027            Ok(v) => v,
3028            Err(e) => {
3029                warn!("Failed to serialize record for flat export: {}", e);
3030                continue;
3031            }
3032        };
3033
3034        let serde_json::Value::Object(map) = val else {
3035            flat.push(val);
3036            continue;
3037        };
3038
3039        // Find the primary items array key (first match wins).
3040        let items_key = ["items", "lines", "allocations", "line_items"]
3041            .iter()
3042            .find(|k| map.contains_key(**k))
3043            .copied();
3044
3045        // Optional nested header sub-object (used by document flows).
3046        let header_map = match map.get("header") {
3047            Some(serde_json::Value::Object(h)) => Some(h),
3048            _ => None,
3049        };
3050
3051        let Some(items_key) = items_key else {
3052            // No items array. Emit one row, unwrapping the optional header
3053            // sub-object so consumers see a flat shape regardless of model
3054            // layout (e.g. Payments have `header` but no items/allocations
3055            // when allocations are empty).
3056            if let Some(header_map) = header_map {
3057                let mut merged = map.clone();
3058                merged.remove("header");
3059                for (k, v) in header_map {
3060                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3061                }
3062                flat.push(serde_json::Value::Object(merged));
3063            } else {
3064                flat.push(serde_json::Value::Object(map));
3065            }
3066            continue;
3067        };
3068
3069        let Some(serde_json::Value::Array(items)) = map.get(items_key) else {
3070            // `items_key` present but not an array — passthrough.
3071            flat.push(serde_json::Value::Object(map));
3072            continue;
3073        };
3074
3075        // Empty items array: emit one row with the (unwrapped) header
3076        // context so downstream consumers can still find the parent
3077        // record — prevents silently dropping empty-lines invoices.
3078        if items.is_empty() {
3079            let mut merged = map.clone();
3080            merged.remove(items_key);
3081            if let Some(header_map) = header_map {
3082                merged.remove("header");
3083                for (k, v) in header_map {
3084                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3085                }
3086            }
3087            flat.push(serde_json::Value::Object(merged));
3088            continue;
3089        }
3090
3091        // Collect all other top-level fields (scalars, objects, arrays)
3092        // so they carry over onto every flattened line — matching pandas
3093        // `explode()` semantics. This is the behaviour SDK consumers
3094        // expect: header context is repeated per line, nested objects
3095        // like `net_amount: {amount, currency}` come along for the ride.
3096        let top_fields: Vec<(&String, &serde_json::Value)> = map
3097            .iter()
3098            .filter(|(k, _)| k.as_str() != "header" && k.as_str() != items_key)
3099            .collect();
3100
3101        flat.reserve(items.len());
3102        for item_val in items {
3103            let mut merged = serde_json::Map::new();
3104            // Line/item fields first (take precedence on collisions).
3105            if let serde_json::Value::Object(m) = item_val {
3106                merged.extend(m.iter().map(|(k, v)| (k.clone(), v.clone())));
3107            }
3108            // Header sub-object (when present) — don't overwrite line fields.
3109            if let Some(header_map) = header_map {
3110                for (k, v) in header_map {
3111                    merged.entry(k.clone()).or_insert_with(|| v.clone());
3112                }
3113            }
3114            // All other top-level fields.
3115            for &(k, v) in &top_fields {
3116                merged.entry(k.clone()).or_insert_with(|| v.clone());
3117            }
3118            flat.push(serde_json::Value::Object(merged));
3119        }
3120    }
3121
3122    if flat.is_empty() {
3123        return;
3124    }
3125
3126    // Stream-write each flattened record instead of serializing the whole Vec
3127    let count = flat.len();
3128    match std::fs::File::create(path) {
3129        Ok(file) => {
3130            use std::io::Write;
3131            let mut writer = std::io::BufWriter::with_capacity(512 * 1024, file);
3132            if let Err(e) = (|| -> Result<(), Box<dyn std::error::Error>> {
3133                writer.write_all(b"[\n")?;
3134                for (i, item) in flat.iter().enumerate() {
3135                    if i > 0 {
3136                        writer.write_all(b",\n")?;
3137                    }
3138                    serde_json::to_writer_pretty(&mut writer, item)?;
3139                }
3140                writer.write_all(b"\n]\n")?;
3141                writer.flush()?;
3142                Ok(())
3143            })() {
3144                warn!("Failed to write {}: {}", label, e);
3145            } else {
3146                info!(
3147                    "  {} written (flat): {} records -> {}",
3148                    label,
3149                    count,
3150                    path.display()
3151                );
3152            }
3153        }
3154        Err(e) => warn!("Failed to create {}: {}", label, e),
3155    }
3156}
3157
3158/// Write a single serializable value as a JSON file.
3159fn write_json_single<T: serde::Serialize>(
3160    data: &T,
3161    path: &Path,
3162    label: &str,
3163) -> Result<(), Box<dyn std::error::Error>> {
3164    let file = std::fs::File::create(path)?;
3165    let writer = std::io::BufWriter::with_capacity(256 * 1024, file);
3166    serde_json::to_writer_pretty(writer, data)?;
3167    info!("  {} written -> {}", label, path.display());
3168    Ok(())
3169}
3170
3171/// Write a single serializable value as a JSON file, logging a warning on failure.
3172fn write_json_single_safe<T: serde::Serialize>(data: &T, path: &Path, label: &str) {
3173    if SKIP_JSON.with(|c| c.get()) {
3174        return;
3175    }
3176    if let Err(e) = write_json_single(data, path, label) {
3177        warn!("Failed to write {}: {}", label, e);
3178    }
3179}
3180
3181/// Serializable summary of balance validation (avoids serializing the full
3182/// `BalanceValidationResult` which has non-Serialize validation error types).
3183#[derive(serde::Serialize)]
3184struct BalanceValidationSummary {
3185    validated: bool,
3186    is_balanced: bool,
3187    entries_processed: u64,
3188    total_debits: String,
3189    total_credits: String,
3190    accounts_tracked: usize,
3191    companies_tracked: usize,
3192    has_unbalanced_entries: bool,
3193    validation_error_count: usize,
3194}
3195
3196impl BalanceValidationSummary {
3197    fn from(v: &crate::enhanced_orchestrator::BalanceValidationResult) -> Self {
3198        Self {
3199            validated: v.validated,
3200            is_balanced: v.is_balanced,
3201            entries_processed: v.entries_processed,
3202            total_debits: v.total_debits.to_string(),
3203            total_credits: v.total_credits.to_string(),
3204            accounts_tracked: v.accounts_tracked,
3205            companies_tracked: v.companies_tracked,
3206            has_unbalanced_entries: v.has_unbalanced_entries,
3207            validation_error_count: v.validation_errors.len(),
3208        }
3209    }
3210}