Skip to main content

datasynth_group/manifest/
builder.rs

1//! GroupManifest assembly — spec §4.1, Task 2.9.
2//!
3//! [`build_manifest`] is the top-level orchestrator.  It calls every prior
4//! Task 2.1–2.8 builder in dependency order and assembles the results into a
5//! single [`GroupManifest`] that drives shard and aggregate phases.
6
7use std::collections::BTreeMap;
8
9use chrono::NaiveDate;
10use serde::{Deserialize, Serialize};
11
12use crate::config::{GroupConfig, OutputLayoutConfig, PeriodLength};
13use crate::errors::{GroupError, GroupResult};
14use crate::manifest::audit_plan::{build_audit_engagement_plan, AuditEngagementPlan};
15use crate::manifest::cgu_plan::{build_cgu_plan, CguPlan};
16use crate::manifest::coa_master::{build_coa_master, ChartOfAccountsMaster};
17use crate::manifest::expansion::{expand_ownership, ExpandedEntity};
18use crate::manifest::fx_master::{build_fx_master, FxRateMaster};
19use crate::manifest::ic_expansion::{expand_ic_relationships, ResolvedIcRelationship};
20use crate::manifest::seeds::{derive_aggregate_seed, derive_entity_seed, derive_manifest_seed};
21use crate::manifest::shard_plan::{build_shard_plan, ShardPlan};
22use crate::manifest::tax_plan::{build_tax_group_plan, TaxGroupPlan};
23
24// ── Schema version ────────────────────────────────────────────────────────────
25
26/// Schema version for the [`GroupManifest`] JSON artifact.
27///
28/// Bump the major component when the shape changes; aggregate-phase readers
29/// must refuse to consume a manifest whose `schema_version` major component
30/// differs from theirs.
31pub const MANIFEST_SCHEMA_VERSION: &str = "1.0";
32
33// ── Public types ──────────────────────────────────────────────────────────────
34
35/// The full manifest JSON artifact that drives shard and aggregate phases.
36///
37/// Produced once per engagement period by [`build_manifest`].  All fields
38/// are deterministic given identical [`GroupConfig`] inputs — byte-identical
39/// serialized JSON is guaranteed for identical configs.
40///
41/// Note: `PartialEq` is not derived because [`ChartOfAccountsMaster`] wraps
42/// [`datasynth_core::models::ChartOfAccounts`] which does not implement it.
43/// Use serialized JSON comparison for equality/determinism tests.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct GroupManifest {
46    /// Format version — currently `"1.0"`.
47    pub schema_version: String,
48    /// Group identifier from [`GroupConfig::id`].
49    pub group_id: String,
50    /// Raw seed from [`GroupConfig::seed`].
51    pub group_seed: u64,
52    /// ISO 4217 presentation currency.
53    pub presentation_currency: String,
54    /// Resolved engagement period.
55    pub period: ManifestPeriod,
56    /// Hex-encoded blake3 digest of the manifest-phase seed (spec §2.4).
57    pub manifest_seed: String,
58    /// Hex-encoded blake3 digest of the aggregate-phase seed (spec §2.4).
59    pub aggregate_seed: String,
60    /// Ownership graph: parent + all expanded entities with per-entity seeds.
61    pub ownership_graph: OwnershipGraphSection,
62    /// Scoping profiles map (raw YAML values forwarded verbatim from config).
63    pub scoping_profiles: BTreeMap<String, serde_yaml::Value>,
64    /// Chart of accounts master — one CoA per distinct accounting framework.
65    pub chart_of_accounts_master: ChartOfAccountsMaster,
66    /// FX rate master — closing/average rates for IAS 21 translation.
67    pub fx_rate_master: FxRateMaster,
68    /// Flat list of all resolved intercompany relationships.
69    pub ic_relationships: Vec<ResolvedIcRelationship>,
70    /// ISA 600-level audit engagement plan (materiality, scope, component auditors).
71    pub audit_engagement_plan: AuditEngagementPlan,
72    /// Pillar Two / CbC / transfer-pricing group tax plan.
73    pub tax_group_plan: TaxGroupPlan,
74    /// **v5.2** — IAS 36 § 10 cash-generating-unit plan: CGU
75    /// definitions + acquisition-date goodwill allocations the
76    /// aggregate phase tests for impairment.  Empty plan when the
77    /// engagement supplies no CGU configuration; v5.0–v5.2 archives
78    /// without this field deserialise to the `Default` (empty) plan
79    /// — backwards-compatible byte-for-byte.
80    #[serde(default)]
81    pub cgu_plan: CguPlan,
82    /// Shard assignment plan — entities batched into ~1 TB shards.
83    pub shard_plan: ShardPlan,
84    /// Output layout config forwarded verbatim from [`GroupConfig::output`].
85    pub output: OutputLayoutConfig,
86    /// **v5.3** — IC matching strategy + tolerance.  Forwarded
87    /// verbatim from `GroupConfig::intercompany::matching`.  v5.2
88    /// archives that don't carry this field deserialise to the
89    /// `Default` (`ManifestDriven`, `tolerance = 0`) — exact-match
90    /// behaviour preserved byte-for-byte.
91    #[serde(default)]
92    pub matching: crate::config::IcMatchingConfig,
93    /// **v5.31** — raw `defaults:` YAML block from [`GroupConfig`],
94    /// forwarded verbatim so per-entity orchestrator configs can read
95    /// generator-level settings (e.g. `fraud:`, `anomaly_injection:`,
96    /// `distributions:`) that the manifest doesn't otherwise model.
97    /// Pre-v5.31 manifests deserialise to `Value::Null` via
98    /// `#[serde(default)]` — backwards-compat preserved.
99    #[serde(default)]
100    pub defaults: serde_yaml::Value,
101}
102
103/// Resolved engagement period.
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
105pub struct ManifestPeriod {
106    /// First day of the period (inclusive).
107    pub start: NaiveDate,
108    /// Last day of the period (inclusive).
109    pub end: NaiveDate,
110    /// Period length from config.
111    pub length: PeriodLength,
112}
113
114/// Ownership graph section of the manifest.
115#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
116pub struct OwnershipGraphSection {
117    /// Top-level parent entity code.
118    pub parent_entity_code: String,
119    /// All entities (explicit + generated), enriched with `entity_seed` and `shard_id`.
120    pub entities: Vec<ManifestEntity>,
121}
122
123/// An entity record in the manifest.
124///
125/// Combines the expanded entity data with the per-entity seed (hex-encoded)
126/// and shard assignment for direct shard consumption.
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
128pub struct ManifestEntity {
129    pub code: String,
130    pub name: Option<String>,
131    pub country: String,
132    pub functional_currency: String,
133    pub scoping_profile: String,
134    pub consolidation_method: crate::config::ConsolidationMethod,
135    pub ownership_percent: Option<rust_decimal::Decimal>,
136    pub parent_code: Option<String>,
137    pub accounting_framework: Option<String>,
138    pub industry: Option<String>,
139    /// **v5.2** — IAS 29 hyperinflationary status of this entity's
140    /// functional currency.  `#[serde(default)]` so v5.0 / v5.1
141    /// archives without the field deserialise to
142    /// `NotHyperinflationary` (the default), preserving backwards
143    /// compatibility byte-for-byte.
144    #[serde(default)]
145    pub hyperinflation_status: datasynth_core::models::HyperinflationStatus,
146    /// **v5.2** — IFRS 3 § 41-42 / IFRS 10 § 23 / IFRS 10.B97 mid-period
147    /// ownership-change events affecting this entity.  Validated by
148    /// the manifest builder: each event's `effective_date` must lie
149    /// within `[period.start, period.end]` (inclusive).  Empty for
150    /// engagements without ownership changes.  `#[serde(default)]`
151    /// keeps v5.0–v5.1 archives loading byte-identically.
152    #[serde(default)]
153    pub ownership_changes: Vec<datasynth_core::models::intercompany::OwnershipChangeEvent>,
154    /// Hex-encoded blake3 digest of the per-entity seed (spec §2.4).
155    pub entity_seed: String,
156    /// Shard identifier assigned by the shard plan (e.g. `"S_SIG_0001"`).
157    pub shard_id: String,
158}
159
160// ── Top-level builder ─────────────────────────────────────────────────────────
161
162/// Build the complete [`GroupManifest`] from a [`GroupConfig`].
163///
164/// This is the single entry point for the manifest phase.  It orchestrates all
165/// Task 2.1–2.8 builders in dependency order:
166///
167/// 1. Period computation (start → end).
168/// 2. Manifest-phase and aggregate-phase seed derivation.
169/// 3. Ownership expansion (explicit + generated entities).
170/// 4. Shard plan — must precede per-entity `shard_id` stamping.
171/// 5. Chart of accounts master.
172/// 6. FX rate master.
173/// 7. Intercompany relationship expansion.
174/// 8. Audit engagement plan.
175/// 9. Tax group plan.
176/// 10. ManifestEntity assembly (entity_seed + shard_id per entity).
177///
178/// # Errors
179/// Propagates all errors from sub-builders.  The most common failure modes are:
180/// - [`GroupError::Config`] for invalid period dates, missing materiality config,
181///   or bad FX rate coverage.
182/// - [`GroupError::Manifest`] for CoA load failures.
183pub fn build_manifest(cfg: &GroupConfig) -> GroupResult<GroupManifest> {
184    // ── 1. Resolve the engagement period ────────────────────────────────────
185    let period = compute_period(&cfg.period)?;
186
187    // ── 2. Derive manifest and aggregate seeds ───────────────────────────────
188    let manifest_seed = derive_manifest_seed(cfg.seed, period.start);
189    let aggregate_seed = derive_aggregate_seed(cfg.seed, period.start);
190
191    // ── 3. Expand ownership ──────────────────────────────────────────────────
192    let expanded: Vec<ExpandedEntity> = expand_ownership(&cfg.ownership, cfg.seed, period.start)?;
193
194    // ── 4. Shard plan (must precede per-entity shard_id assignment) ──────────
195    let shard_plan = build_shard_plan(&expanded, &cfg.scoping_profiles)?;
196    let shard_by_code = shard_plan.shard_by_code();
197
198    // ── 5. Chart of accounts master ──────────────────────────────────────────
199    let coa_master = build_coa_master(&expanded, &cfg.defaults, &cfg.id)?;
200
201    // ── 6. FX rate master ────────────────────────────────────────────────────
202    let fx_master = build_fx_master(
203        &cfg.fx,
204        &cfg.presentation_currency,
205        period.start,
206        period.end,
207        &expanded,
208    )?;
209
210    // ── 7. Intercompany relationship expansion ───────────────────────────────
211    let ic_relationships = expand_ic_relationships(&cfg.intercompany, &expanded, cfg.seed)?;
212
213    // ── 8. Audit engagement plan ─────────────────────────────────────────────
214    let audit_engagement_plan =
215        build_audit_engagement_plan(&cfg.audit, &expanded, &cfg.id, &aggregate_seed)?;
216
217    // ── 9. Tax group plan ────────────────────────────────────────────────────
218    let tax_group_plan = build_tax_group_plan(&cfg.tax, &expanded)?;
219
220    // ── 9b. CGU plan (IAS 36 § 10) — empty when not configured ───────────────
221    let cgu_plan = build_cgu_plan(&cfg.cgu, &expanded)?;
222
223    // ── 10. ManifestEntity: enrich each ExpandedEntity ────────────────────────
224    // Also validates that every ownership-change event's effective_date
225    // falls within [period.start, period.end] (inclusive) — events
226    // outside the engagement period are rejected as a config error.
227    let entities: Vec<ManifestEntity> = expanded
228        .iter()
229        .map(|e| {
230            for ev in &e.ownership_changes {
231                if ev.effective_date < period.start || ev.effective_date > period.end {
232                    return Err(GroupError::Config(format!(
233                        "entity {}: ownership_change effective_date {} is outside the \
234                         engagement period [{}, {}]",
235                        e.code, ev.effective_date, period.start, period.end,
236                    )));
237                }
238                if ev.ownership_percent_before < rust_decimal::Decimal::ZERO
239                    || ev.ownership_percent_before > rust_decimal::Decimal::ONE
240                {
241                    return Err(GroupError::Config(format!(
242                        "entity {}: ownership_percent_before {} is not in [0, 1]",
243                        e.code, ev.ownership_percent_before,
244                    )));
245                }
246                if ev.ownership_percent_after < rust_decimal::Decimal::ZERO
247                    || ev.ownership_percent_after > rust_decimal::Decimal::ONE
248                {
249                    return Err(GroupError::Config(format!(
250                        "entity {}: ownership_percent_after {} is not in [0, 1]",
251                        e.code, ev.ownership_percent_after,
252                    )));
253                }
254            }
255            Ok(ManifestEntity {
256                code: e.code.clone(),
257                name: e.name.clone(),
258                country: e.country.clone(),
259                functional_currency: e.functional_currency.clone(),
260                scoping_profile: e.scoping_profile.clone(),
261                consolidation_method: e.consolidation_method,
262                ownership_percent: e.ownership_percent,
263                parent_code: e.parent_code.clone(),
264                accounting_framework: e.accounting_framework.clone(),
265                industry: e.industry.clone(),
266                hyperinflation_status: e.hyperinflation_status,
267                ownership_changes: e.ownership_changes.clone(),
268                entity_seed: hex::encode(derive_entity_seed(cfg.seed, &e.code)),
269                shard_id: shard_by_code
270                    .get(e.code.as_str())
271                    .cloned()
272                    .unwrap_or_default(),
273            })
274        })
275        .collect::<GroupResult<Vec<_>>>()?;
276
277    Ok(GroupManifest {
278        schema_version: MANIFEST_SCHEMA_VERSION.to_string(),
279        group_id: cfg.id.clone(),
280        group_seed: cfg.seed,
281        presentation_currency: cfg.presentation_currency.clone(),
282        period,
283        manifest_seed: hex::encode(manifest_seed),
284        aggregate_seed: hex::encode(aggregate_seed),
285        ownership_graph: OwnershipGraphSection {
286            parent_entity_code: cfg.ownership.parent_entity_code.clone(),
287            entities,
288        },
289        scoping_profiles: cfg.scoping_profiles.clone(),
290        chart_of_accounts_master: coa_master,
291        fx_rate_master: fx_master,
292        ic_relationships,
293        audit_engagement_plan,
294        tax_group_plan,
295        cgu_plan,
296        shard_plan,
297        output: cfg.output.clone(),
298        // v5.3: IC matching strategy + fuzzy tolerance.  Defaults
299        // produced by `IcMatchingConfig::default()` match the v5.0
300        // exact-match behaviour byte-for-byte.
301        matching: cfg.intercompany.matching.clone(),
302        // v5.31: raw `defaults:` YAML block forwarded so per-entity
303        // configs can read generator-level settings like `fraud:`.
304        defaults: cfg.defaults.clone(),
305    })
306}
307
308// ── Private helpers ───────────────────────────────────────────────────────────
309
310/// Compute the [`ManifestPeriod`] from a [`crate::config::PeriodConfig`].
311///
312/// The period `end` date is the last calendar day *within* the period:
313/// - Monthly  → start + 1 month − 1 day
314/// - Quarterly → start + 3 months − 1 day
315/// - SemiAnnual → start + 6 months − 1 day
316/// - Annual  → start + 12 months − 1 day
317fn compute_period(cfg: &crate::config::PeriodConfig) -> GroupResult<ManifestPeriod> {
318    compute_period_pub(cfg)
319}
320
321/// Public re-export of period computation for integration testing.
322///
323/// Exposed so `tests/manifest_builder.rs` can exercise period-end edge cases
324/// (leap-year February, quarter boundaries, etc.) without going through the
325/// full `build_manifest` stack.
326pub fn compute_period_pub(cfg: &crate::config::PeriodConfig) -> GroupResult<ManifestPeriod> {
327    let start = cfg.start_date;
328    let end = match cfg.length {
329        PeriodLength::Monthly => start.checked_add_months(chrono::Months::new(1)),
330        PeriodLength::Quarterly => start.checked_add_months(chrono::Months::new(3)),
331        PeriodLength::SemiAnnual => start.checked_add_months(chrono::Months::new(6)),
332        PeriodLength::Annual => start.checked_add_months(chrono::Months::new(12)),
333    }
334    .and_then(|d| d.pred_opt())
335    .ok_or_else(|| {
336        GroupError::Config(format!(
337            "invalid period start_date '{}' — cannot compute period end",
338            start
339        ))
340    })?;
341
342    Ok(ManifestPeriod {
343        start,
344        end,
345        length: cfg.length,
346    })
347}