datasynth_group/manifest/builder.rs
1//! GroupManifest assembly — spec §4.1, Task 2.9.
2//!
3//! [`build_manifest`] is the top-level orchestrator. It calls every prior
4//! Task 2.1–2.8 builder in dependency order and assembles the results into a
5//! single [`GroupManifest`] that drives shard and aggregate phases.
6
7use std::collections::BTreeMap;
8
9use chrono::NaiveDate;
10use serde::{Deserialize, Serialize};
11
12use crate::config::{GroupConfig, OutputLayoutConfig, PeriodLength};
13use crate::errors::{GroupError, GroupResult};
14use crate::manifest::audit_plan::{build_audit_engagement_plan, AuditEngagementPlan};
15use crate::manifest::cgu_plan::{build_cgu_plan, CguPlan};
16use crate::manifest::coa_master::{build_coa_master, ChartOfAccountsMaster};
17use crate::manifest::expansion::{expand_ownership, ExpandedEntity};
18use crate::manifest::fx_master::{build_fx_master, FxRateMaster};
19use crate::manifest::ic_expansion::{expand_ic_relationships, ResolvedIcRelationship};
20use crate::manifest::seeds::{derive_aggregate_seed, derive_entity_seed, derive_manifest_seed};
21use crate::manifest::shard_plan::{build_shard_plan, ShardPlan};
22use crate::manifest::tax_plan::{build_tax_group_plan, TaxGroupPlan};
23
24// ── Schema version ────────────────────────────────────────────────────────────
25
26/// Schema version for the [`GroupManifest`] JSON artifact.
27///
28/// Bump the major component when the shape changes; aggregate-phase readers
29/// must refuse to consume a manifest whose `schema_version` major component
30/// differs from theirs.
31pub const MANIFEST_SCHEMA_VERSION: &str = "1.0";
32
33// ── Public types ──────────────────────────────────────────────────────────────
34
35/// The full manifest JSON artifact that drives shard and aggregate phases.
36///
37/// Produced once per engagement period by [`build_manifest`]. All fields
38/// are deterministic given identical [`GroupConfig`] inputs — byte-identical
39/// serialized JSON is guaranteed for identical configs.
40///
41/// Note: `PartialEq` is not derived because [`ChartOfAccountsMaster`] wraps
42/// [`datasynth_core::models::ChartOfAccounts`] which does not implement it.
43/// Use serialized JSON comparison for equality/determinism tests.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct GroupManifest {
46 /// Format version — currently `"1.0"`.
47 pub schema_version: String,
48 /// Group identifier from [`GroupConfig::id`].
49 pub group_id: String,
50 /// Raw seed from [`GroupConfig::seed`].
51 pub group_seed: u64,
52 /// ISO 4217 presentation currency.
53 pub presentation_currency: String,
54 /// Resolved engagement period.
55 pub period: ManifestPeriod,
56 /// Hex-encoded blake3 digest of the manifest-phase seed (spec §2.4).
57 pub manifest_seed: String,
58 /// Hex-encoded blake3 digest of the aggregate-phase seed (spec §2.4).
59 pub aggregate_seed: String,
60 /// Ownership graph: parent + all expanded entities with per-entity seeds.
61 pub ownership_graph: OwnershipGraphSection,
62 /// Scoping profiles map (raw YAML values forwarded verbatim from config).
63 pub scoping_profiles: BTreeMap<String, serde_yaml::Value>,
64 /// Chart of accounts master — one CoA per distinct accounting framework.
65 pub chart_of_accounts_master: ChartOfAccountsMaster,
66 /// FX rate master — closing/average rates for IAS 21 translation.
67 pub fx_rate_master: FxRateMaster,
68 /// Flat list of all resolved intercompany relationships.
69 pub ic_relationships: Vec<ResolvedIcRelationship>,
70 /// ISA 600-level audit engagement plan (materiality, scope, component auditors).
71 pub audit_engagement_plan: AuditEngagementPlan,
72 /// Pillar Two / CbC / transfer-pricing group tax plan.
73 pub tax_group_plan: TaxGroupPlan,
74 /// **v5.2** — IAS 36 § 10 cash-generating-unit plan: CGU
75 /// definitions + acquisition-date goodwill allocations the
76 /// aggregate phase tests for impairment. Empty plan when the
77 /// engagement supplies no CGU configuration; v5.0–v5.2 archives
78 /// without this field deserialise to the `Default` (empty) plan
79 /// — backwards-compatible byte-for-byte.
80 #[serde(default)]
81 pub cgu_plan: CguPlan,
82 /// Shard assignment plan — entities batched into ~1 TB shards.
83 pub shard_plan: ShardPlan,
84 /// Output layout config forwarded verbatim from [`GroupConfig::output`].
85 pub output: OutputLayoutConfig,
86 /// **v5.3** — IC matching strategy + tolerance. Forwarded
87 /// verbatim from `GroupConfig::intercompany::matching`. v5.2
88 /// archives that don't carry this field deserialise to the
89 /// `Default` (`ManifestDriven`, `tolerance = 0`) — exact-match
90 /// behaviour preserved byte-for-byte.
91 #[serde(default)]
92 pub matching: crate::config::IcMatchingConfig,
93 /// **v5.31** — raw `defaults:` YAML block from [`GroupConfig`],
94 /// forwarded verbatim so per-entity orchestrator configs can read
95 /// generator-level settings (e.g. `fraud:`, `anomaly_injection:`,
96 /// `distributions:`) that the manifest doesn't otherwise model.
97 /// Pre-v5.31 manifests deserialise to `Value::Null` via
98 /// `#[serde(default)]` — backwards-compat preserved.
99 #[serde(default)]
100 pub defaults: serde_yaml::Value,
101}
102
103/// Resolved engagement period.
104#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
105pub struct ManifestPeriod {
106 /// First day of the period (inclusive).
107 pub start: NaiveDate,
108 /// Last day of the period (inclusive).
109 pub end: NaiveDate,
110 /// Period length from config.
111 pub length: PeriodLength,
112}
113
114/// Ownership graph section of the manifest.
115#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
116pub struct OwnershipGraphSection {
117 /// Top-level parent entity code.
118 pub parent_entity_code: String,
119 /// All entities (explicit + generated), enriched with `entity_seed` and `shard_id`.
120 pub entities: Vec<ManifestEntity>,
121}
122
123/// An entity record in the manifest.
124///
125/// Combines the expanded entity data with the per-entity seed (hex-encoded)
126/// and shard assignment for direct shard consumption.
127#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
128pub struct ManifestEntity {
129 pub code: String,
130 pub name: Option<String>,
131 pub country: String,
132 pub functional_currency: String,
133 pub scoping_profile: String,
134 pub consolidation_method: crate::config::ConsolidationMethod,
135 pub ownership_percent: Option<rust_decimal::Decimal>,
136 pub parent_code: Option<String>,
137 pub accounting_framework: Option<String>,
138 pub industry: Option<String>,
139 /// **v5.2** — IAS 29 hyperinflationary status of this entity's
140 /// functional currency. `#[serde(default)]` so v5.0 / v5.1
141 /// archives without the field deserialise to
142 /// `NotHyperinflationary` (the default), preserving backwards
143 /// compatibility byte-for-byte.
144 #[serde(default)]
145 pub hyperinflation_status: datasynth_core::models::HyperinflationStatus,
146 /// **v5.2** — IFRS 3 § 41-42 / IFRS 10 § 23 / IFRS 10.B97 mid-period
147 /// ownership-change events affecting this entity. Validated by
148 /// the manifest builder: each event's `effective_date` must lie
149 /// within `[period.start, period.end]` (inclusive). Empty for
150 /// engagements without ownership changes. `#[serde(default)]`
151 /// keeps v5.0–v5.1 archives loading byte-identically.
152 #[serde(default)]
153 pub ownership_changes: Vec<datasynth_core::models::intercompany::OwnershipChangeEvent>,
154 /// Hex-encoded blake3 digest of the per-entity seed (spec §2.4).
155 pub entity_seed: String,
156 /// Shard identifier assigned by the shard plan (e.g. `"S_SIG_0001"`).
157 pub shard_id: String,
158}
159
160// ── Top-level builder ─────────────────────────────────────────────────────────
161
162/// Build the complete [`GroupManifest`] from a [`GroupConfig`].
163///
164/// This is the single entry point for the manifest phase. It orchestrates all
165/// Task 2.1–2.8 builders in dependency order:
166///
167/// 1. Period computation (start → end).
168/// 2. Manifest-phase and aggregate-phase seed derivation.
169/// 3. Ownership expansion (explicit + generated entities).
170/// 4. Shard plan — must precede per-entity `shard_id` stamping.
171/// 5. Chart of accounts master.
172/// 6. FX rate master.
173/// 7. Intercompany relationship expansion.
174/// 8. Audit engagement plan.
175/// 9. Tax group plan.
176/// 10. ManifestEntity assembly (entity_seed + shard_id per entity).
177///
178/// # Errors
179/// Propagates all errors from sub-builders. The most common failure modes are:
180/// - [`GroupError::Config`] for invalid period dates, missing materiality config,
181/// or bad FX rate coverage.
182/// - [`GroupError::Manifest`] for CoA load failures.
183pub fn build_manifest(cfg: &GroupConfig) -> GroupResult<GroupManifest> {
184 // ── 1. Resolve the engagement period ────────────────────────────────────
185 let period = compute_period(&cfg.period)?;
186
187 // ── 2. Derive manifest and aggregate seeds ───────────────────────────────
188 let manifest_seed = derive_manifest_seed(cfg.seed, period.start);
189 let aggregate_seed = derive_aggregate_seed(cfg.seed, period.start);
190
191 // ── 3. Expand ownership ──────────────────────────────────────────────────
192 let expanded: Vec<ExpandedEntity> = expand_ownership(&cfg.ownership, cfg.seed, period.start)?;
193
194 // ── 4. Shard plan (must precede per-entity shard_id assignment) ──────────
195 let shard_plan = build_shard_plan(&expanded, &cfg.scoping_profiles)?;
196 let shard_by_code = shard_plan.shard_by_code();
197
198 // ── 5. Chart of accounts master ──────────────────────────────────────────
199 let coa_master = build_coa_master(&expanded, &cfg.defaults, &cfg.id)?;
200
201 // ── 6. FX rate master ────────────────────────────────────────────────────
202 let fx_master = build_fx_master(
203 &cfg.fx,
204 &cfg.presentation_currency,
205 period.start,
206 period.end,
207 &expanded,
208 )?;
209
210 // ── 7. Intercompany relationship expansion ───────────────────────────────
211 let ic_relationships = expand_ic_relationships(&cfg.intercompany, &expanded, cfg.seed)?;
212
213 // ── 8. Audit engagement plan ─────────────────────────────────────────────
214 let audit_engagement_plan =
215 build_audit_engagement_plan(&cfg.audit, &expanded, &cfg.id, &aggregate_seed)?;
216
217 // ── 9. Tax group plan ────────────────────────────────────────────────────
218 let tax_group_plan = build_tax_group_plan(&cfg.tax, &expanded)?;
219
220 // ── 9b. CGU plan (IAS 36 § 10) — empty when not configured ───────────────
221 let cgu_plan = build_cgu_plan(&cfg.cgu, &expanded)?;
222
223 // ── 10. ManifestEntity: enrich each ExpandedEntity ────────────────────────
224 // Also validates that every ownership-change event's effective_date
225 // falls within [period.start, period.end] (inclusive) — events
226 // outside the engagement period are rejected as a config error.
227 let entities: Vec<ManifestEntity> = expanded
228 .iter()
229 .map(|e| {
230 for ev in &e.ownership_changes {
231 if ev.effective_date < period.start || ev.effective_date > period.end {
232 return Err(GroupError::Config(format!(
233 "entity {}: ownership_change effective_date {} is outside the \
234 engagement period [{}, {}]",
235 e.code, ev.effective_date, period.start, period.end,
236 )));
237 }
238 if ev.ownership_percent_before < rust_decimal::Decimal::ZERO
239 || ev.ownership_percent_before > rust_decimal::Decimal::ONE
240 {
241 return Err(GroupError::Config(format!(
242 "entity {}: ownership_percent_before {} is not in [0, 1]",
243 e.code, ev.ownership_percent_before,
244 )));
245 }
246 if ev.ownership_percent_after < rust_decimal::Decimal::ZERO
247 || ev.ownership_percent_after > rust_decimal::Decimal::ONE
248 {
249 return Err(GroupError::Config(format!(
250 "entity {}: ownership_percent_after {} is not in [0, 1]",
251 e.code, ev.ownership_percent_after,
252 )));
253 }
254 }
255 Ok(ManifestEntity {
256 code: e.code.clone(),
257 name: e.name.clone(),
258 country: e.country.clone(),
259 functional_currency: e.functional_currency.clone(),
260 scoping_profile: e.scoping_profile.clone(),
261 consolidation_method: e.consolidation_method,
262 ownership_percent: e.ownership_percent,
263 parent_code: e.parent_code.clone(),
264 accounting_framework: e.accounting_framework.clone(),
265 industry: e.industry.clone(),
266 hyperinflation_status: e.hyperinflation_status,
267 ownership_changes: e.ownership_changes.clone(),
268 entity_seed: hex::encode(derive_entity_seed(cfg.seed, &e.code)),
269 shard_id: shard_by_code
270 .get(e.code.as_str())
271 .cloned()
272 .unwrap_or_default(),
273 })
274 })
275 .collect::<GroupResult<Vec<_>>>()?;
276
277 Ok(GroupManifest {
278 schema_version: MANIFEST_SCHEMA_VERSION.to_string(),
279 group_id: cfg.id.clone(),
280 group_seed: cfg.seed,
281 presentation_currency: cfg.presentation_currency.clone(),
282 period,
283 manifest_seed: hex::encode(manifest_seed),
284 aggregate_seed: hex::encode(aggregate_seed),
285 ownership_graph: OwnershipGraphSection {
286 parent_entity_code: cfg.ownership.parent_entity_code.clone(),
287 entities,
288 },
289 scoping_profiles: cfg.scoping_profiles.clone(),
290 chart_of_accounts_master: coa_master,
291 fx_rate_master: fx_master,
292 ic_relationships,
293 audit_engagement_plan,
294 tax_group_plan,
295 cgu_plan,
296 shard_plan,
297 output: cfg.output.clone(),
298 // v5.3: IC matching strategy + fuzzy tolerance. Defaults
299 // produced by `IcMatchingConfig::default()` match the v5.0
300 // exact-match behaviour byte-for-byte.
301 matching: cfg.intercompany.matching.clone(),
302 // v5.31: raw `defaults:` YAML block forwarded so per-entity
303 // configs can read generator-level settings like `fraud:`.
304 defaults: cfg.defaults.clone(),
305 })
306}
307
308// ── Private helpers ───────────────────────────────────────────────────────────
309
310/// Compute the [`ManifestPeriod`] from a [`crate::config::PeriodConfig`].
311///
312/// The period `end` date is the last calendar day *within* the period:
313/// - Monthly → start + 1 month − 1 day
314/// - Quarterly → start + 3 months − 1 day
315/// - SemiAnnual → start + 6 months − 1 day
316/// - Annual → start + 12 months − 1 day
317fn compute_period(cfg: &crate::config::PeriodConfig) -> GroupResult<ManifestPeriod> {
318 compute_period_pub(cfg)
319}
320
321/// Public re-export of period computation for integration testing.
322///
323/// Exposed so `tests/manifest_builder.rs` can exercise period-end edge cases
324/// (leap-year February, quarter boundaries, etc.) without going through the
325/// full `build_manifest` stack.
326pub fn compute_period_pub(cfg: &crate::config::PeriodConfig) -> GroupResult<ManifestPeriod> {
327 let start = cfg.start_date;
328 let end = match cfg.length {
329 PeriodLength::Monthly => start.checked_add_months(chrono::Months::new(1)),
330 PeriodLength::Quarterly => start.checked_add_months(chrono::Months::new(3)),
331 PeriodLength::SemiAnnual => start.checked_add_months(chrono::Months::new(6)),
332 PeriodLength::Annual => start.checked_add_months(chrono::Months::new(12)),
333 }
334 .and_then(|d| d.pred_opt())
335 .ok_or_else(|| {
336 GroupError::Config(format!(
337 "invalid period start_date '{}' — cannot compute period end",
338 start
339 ))
340 })?;
341
342 Ok(ManifestPeriod {
343 start,
344 end,
345 length: cfg.length,
346 })
347}