datasynth_core/industry_packs.rs
1//! Industry-specific account-pack definitions.
2//!
3//! v5.7.0 — adds sector-specific sub-account expansion to the chart of
4//! accounts. Real-world ERPs decompose a canonical control account
5//! (e.g. `4000` Product Revenue) into many product-line / channel /
6//! cost-center sub-accounts (`400010` Steel Products, `400020` Aluminum
7//! Components, …). Synthetic data without that decomposition stands out
8//! as obviously synthetic — flat consecutive numbering with one record
9//! per canonical category.
10//!
11//! Packs are YAML files embedded via `include_str!` — zero-I/O at
12//! runtime, all five industries fit in <30 KB total.
13//!
14//! ## Sub-account number convention
15//!
16//! Sub-account numbers are formed by concatenating the parent's 4-digit
17//! canonical account number with a 2-digit suffix → 6-digit:
18//! `parent="4000" + suffix="10" → "400010"`. Real-world ERPs (especially
19//! SAP-FI) routinely mix 4- and 6-digit accounts in this way — the
20//! 4-digit codes act as control accounts (parent / GL-summary level)
21//! and the 6-digit codes are the detail postings.
22//!
23//! Suffixes are deliberately not consecutive: gaps simulate retired,
24//! migrated, or reserved suffixes that real COAs accumulate over time.
25//!
26//! ## Picker semantics
27//!
28//! The `weight` on each sub-account drives the deterministic-by-document
29//! picker exposed via `ChartOfAccounts::pick_subaccount_for_document`
30//! (in `models::chart_of_accounts`). Higher weight = more likely.
31//! Hashing on `document_id` keeps every regeneration of the same dataset
32//! byte-identical.
33
34use serde::{Deserialize, Serialize};
35
36use crate::models::IndustrySector;
37
38// ──────────────────────────────────────────────────────────────────────
39// Embedded pack sources
40// ──────────────────────────────────────────────────────────────────────
41
42const PACK_MANUFACTURING: &str = include_str!("industry_packs/manufacturing.yaml");
43const PACK_RETAIL: &str = include_str!("industry_packs/retail.yaml");
44const PACK_FINANCIAL_SERVICES: &str = include_str!("industry_packs/financial_services.yaml");
45const PACK_HEALTHCARE: &str = include_str!("industry_packs/healthcare.yaml");
46const PACK_TECHNOLOGY: &str = include_str!("industry_packs/technology.yaml");
47
48// ──────────────────────────────────────────────────────────────────────
49// Types
50// ──────────────────────────────────────────────────────────────────────
51
52/// One sub-account within a canonical-account expansion.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct SubAccountSpec {
55 /// 2-character digit suffix appended to the parent's 4-digit
56 /// canonical account number. Must be `0`-`99` formatted as 2 digits.
57 pub suffix: String,
58 /// Descriptive name suffix; full sub-account name is rendered as
59 /// `"<parent_name> — <name>"`.
60 pub name: String,
61 /// Picker weight (relative; higher = more likely to be selected
62 /// for a given document under the deterministic-by-document picker).
63 pub weight: f64,
64}
65
66/// One canonical-account expansion: a parent canonical account number
67/// plus the list of sub-accounts that should fan out from it when
68/// industry-pack expansion is enabled.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct AccountExpansion {
71 /// 4-digit canonical account number from
72 /// [`crate::accounts`] (e.g. `"4000"` for `PRODUCT_REVENUE`).
73 pub parent_account: String,
74 /// Human name of the parent. Used as the prefix when rendering
75 /// sub-account names.
76 pub parent_name: String,
77 /// Sub-accounts that fan out from the parent.
78 pub sub_accounts: Vec<SubAccountSpec>,
79}
80
81/// A complete industry pack — parsed from one of the embedded YAML files.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct IndustryPack {
84 /// Industry identifier (matches [`IndustrySector`] in snake_case).
85 pub industry: String,
86 /// Human description.
87 pub description: String,
88 /// Pack schema version.
89 #[serde(default = "default_version")]
90 pub version: u32,
91 /// All canonical-account expansions defined by this pack.
92 pub expansions: Vec<AccountExpansion>,
93}
94
95fn default_version() -> u32 {
96 1
97}
98
99impl IndustryPack {
100 /// Parse a pack from a YAML string.
101 pub fn parse(yaml: &str) -> Result<Self, String> {
102 serde_yaml::from_str(yaml).map_err(|e| format!("industry pack parse error: {e}"))
103 }
104
105 /// Look up an expansion for a given canonical parent account.
106 pub fn expansion_for(&self, parent_account: &str) -> Option<&AccountExpansion> {
107 self.expansions
108 .iter()
109 .find(|e| e.parent_account == parent_account)
110 }
111}
112
113/// Return the embedded YAML source for `sector`, if a pack exists.
114pub fn raw_pack_for(sector: IndustrySector) -> Option<&'static str> {
115 match sector {
116 IndustrySector::Manufacturing => Some(PACK_MANUFACTURING),
117 IndustrySector::Retail => Some(PACK_RETAIL),
118 IndustrySector::FinancialServices => Some(PACK_FINANCIAL_SERVICES),
119 IndustrySector::Healthcare => Some(PACK_HEALTHCARE),
120 IndustrySector::Technology => Some(PACK_TECHNOLOGY),
121 // v5.7.0 MVP ships 5 packs; remaining sectors fall through to
122 // None (no expansion) — packs can be added without API change.
123 IndustrySector::ProfessionalServices
124 | IndustrySector::Energy
125 | IndustrySector::Transportation
126 | IndustrySector::RealEstate
127 | IndustrySector::Telecommunications => None,
128 }
129}
130
131/// Load and parse the pack for `sector`. Returns `Ok(None)` if no pack
132/// is shipped for that sector (no expansion will happen).
133pub fn load_pack(sector: IndustrySector) -> Result<Option<IndustryPack>, String> {
134 match raw_pack_for(sector) {
135 Some(yaml) => IndustryPack::parse(yaml).map(Some),
136 None => Ok(None),
137 }
138}
139
140/// Render the full sub-account number from a parent + suffix.
141///
142/// `parent` is normalised to 4 digits (zero-padded if shorter, or
143/// truncated to its first 4 chars if longer); `suffix` is normalised to
144/// 2 digits (zero-padded if shorter); the result is parent + suffix.
145pub fn render_sub_account_number(parent: &str, suffix: &str) -> String {
146 let parent4: String = parent.chars().take(4).collect();
147 let parent4 = format!("{parent4:0>4}");
148 let suffix2 = format!("{suffix:0>2}");
149 format!("{parent4}{suffix2}")
150}
151
152/// Render the full sub-account name from a parent name + sub-account name.
153pub fn render_sub_account_name(parent_name: &str, sub_name: &str) -> String {
154 format!("{parent_name} — {sub_name}")
155}
156
157// ──────────────────────────────────────────────────────────────────────
158// Tests
159// ──────────────────────────────────────────────────────────────────────
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 /// Every shipped pack must parse, declare a valid industry name,
166 /// and contain at least one expansion with at least two sub-accounts.
167 #[test]
168 fn all_shipped_packs_parse_and_are_well_formed() {
169 for sector in [
170 IndustrySector::Manufacturing,
171 IndustrySector::Retail,
172 IndustrySector::FinancialServices,
173 IndustrySector::Healthcare,
174 IndustrySector::Technology,
175 ] {
176 let pack = load_pack(sector)
177 .unwrap_or_else(|e| panic!("{:?} pack failed to load: {e}", sector))
178 .unwrap_or_else(|| panic!("{:?} pack should be shipped", sector));
179 assert!(!pack.industry.is_empty(), "{:?}: industry empty", sector);
180 assert!(
181 !pack.expansions.is_empty(),
182 "{:?}: at least one expansion required",
183 sector
184 );
185 for exp in &pack.expansions {
186 assert_eq!(
187 exp.parent_account.len(),
188 4,
189 "{:?} parent {:?} should be 4 digits",
190 sector,
191 exp.parent_account
192 );
193 assert!(
194 exp.parent_account.chars().all(|c| c.is_ascii_digit()),
195 "{:?} parent {:?} should be all digits",
196 sector,
197 exp.parent_account
198 );
199 assert!(
200 exp.sub_accounts.len() >= 2,
201 "{:?} parent {} should have ≥2 sub-accounts",
202 sector,
203 exp.parent_account
204 );
205 for sub in &exp.sub_accounts {
206 assert_eq!(
207 sub.suffix.len(),
208 2,
209 "{:?} sub suffix {:?} should be 2 chars",
210 sector,
211 sub.suffix
212 );
213 assert!(
214 sub.suffix.chars().all(|c| c.is_ascii_digit()),
215 "{:?} sub suffix {:?} should be digits",
216 sector,
217 sub.suffix
218 );
219 assert!(
220 sub.weight > 0.0,
221 "{:?} sub {:?} weight must be positive",
222 sector,
223 sub.name
224 );
225 assert!(!sub.name.is_empty(), "{:?} sub name empty", sector);
226 }
227 // Suffixes must be unique within an expansion.
228 let mut suffixes: Vec<&str> =
229 exp.sub_accounts.iter().map(|s| s.suffix.as_str()).collect();
230 suffixes.sort();
231 let before = suffixes.len();
232 suffixes.dedup();
233 assert_eq!(
234 before,
235 suffixes.len(),
236 "{:?} parent {} has duplicate suffixes",
237 sector,
238 exp.parent_account
239 );
240 }
241 }
242 }
243
244 #[test]
245 fn unsupported_sectors_return_none() {
246 assert!(matches!(load_pack(IndustrySector::Energy), Ok(None)));
247 assert!(matches!(
248 load_pack(IndustrySector::ProfessionalServices),
249 Ok(None)
250 ));
251 }
252
253 #[test]
254 fn render_sub_account_number_concatenates_padded() {
255 assert_eq!(render_sub_account_number("4000", "10"), "400010");
256 assert_eq!(render_sub_account_number("4000", "5"), "400005");
257 assert_eq!(render_sub_account_number("60", "10"), "006010");
258 }
259
260 #[test]
261 fn render_sub_account_name_uses_em_dash() {
262 assert_eq!(
263 render_sub_account_name("Product Revenue", "Steel Products"),
264 "Product Revenue — Steel Products"
265 );
266 }
267
268 #[test]
269 fn manufacturing_has_expected_revenue_split() {
270 let pack = load_pack(IndustrySector::Manufacturing).unwrap().unwrap();
271 let rev = pack
272 .expansion_for("4000")
273 .expect("manufacturing must expand 4000");
274 let names: Vec<&str> = rev.sub_accounts.iter().map(|s| s.name.as_str()).collect();
275 assert!(names.iter().any(|n| n.contains("Steel")));
276 assert!(names.iter().any(|n| n.contains("Aluminum")));
277 }
278}