Skip to main content

datasynth_core/
industry_packs.rs

1//! Industry-specific account-pack definitions.
2//!
3//! v5.7.0 — adds sector-specific sub-account expansion to the chart of
4//! accounts. Real-world ERPs decompose a canonical control account
5//! (e.g. `4000` Product Revenue) into many product-line / channel /
6//! cost-center sub-accounts (`400010` Steel Products, `400020` Aluminum
7//! Components, …). Synthetic data without that decomposition stands out
8//! as obviously synthetic — flat consecutive numbering with one record
9//! per canonical category.
10//!
11//! Packs are YAML files embedded via `include_str!` — zero-I/O at
12//! runtime, all five industries fit in <30 KB total.
13//!
14//! ## Sub-account number convention
15//!
16//! Sub-account numbers are formed by concatenating the parent's 4-digit
17//! canonical account number with a 2-digit suffix → 6-digit:
18//! `parent="4000" + suffix="10" → "400010"`. Real-world ERPs (especially
19//! SAP-FI) routinely mix 4- and 6-digit accounts in this way — the
20//! 4-digit codes act as control accounts (parent / GL-summary level)
21//! and the 6-digit codes are the detail postings.
22//!
23//! Suffixes are deliberately not consecutive: gaps simulate retired,
24//! migrated, or reserved suffixes that real COAs accumulate over time.
25//!
26//! ## Picker semantics
27//!
28//! The `weight` on each sub-account drives the deterministic-by-document
29//! picker exposed via `ChartOfAccounts::pick_subaccount_for_document`
30//! (in `models::chart_of_accounts`). Higher weight = more likely.
31//! Hashing on `document_id` keeps every regeneration of the same dataset
32//! byte-identical.
33
34use serde::{Deserialize, Serialize};
35
36use crate::models::IndustrySector;
37
38// ──────────────────────────────────────────────────────────────────────
39// Embedded pack sources
40// ──────────────────────────────────────────────────────────────────────
41
42const PACK_MANUFACTURING: &str = include_str!("industry_packs/manufacturing.yaml");
43const PACK_RETAIL: &str = include_str!("industry_packs/retail.yaml");
44const PACK_FINANCIAL_SERVICES: &str = include_str!("industry_packs/financial_services.yaml");
45const PACK_HEALTHCARE: &str = include_str!("industry_packs/healthcare.yaml");
46const PACK_TECHNOLOGY: &str = include_str!("industry_packs/technology.yaml");
47
48// ──────────────────────────────────────────────────────────────────────
49// Types
50// ──────────────────────────────────────────────────────────────────────
51
52/// One sub-account within a canonical-account expansion.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct SubAccountSpec {
55    /// 2-character digit suffix appended to the parent's 4-digit
56    /// canonical account number. Must be `0`-`99` formatted as 2 digits.
57    pub suffix: String,
58    /// Descriptive name suffix; full sub-account name is rendered as
59    /// `"<parent_name> — <name>"`.
60    pub name: String,
61    /// Picker weight (relative; higher = more likely to be selected
62    /// for a given document under the deterministic-by-document picker).
63    pub weight: f64,
64}
65
66/// One canonical-account expansion: a parent canonical account number
67/// plus the list of sub-accounts that should fan out from it when
68/// industry-pack expansion is enabled.
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct AccountExpansion {
71    /// 4-digit canonical account number from
72    /// [`crate::accounts`] (e.g. `"4000"` for `PRODUCT_REVENUE`).
73    pub parent_account: String,
74    /// Human name of the parent. Used as the prefix when rendering
75    /// sub-account names.
76    pub parent_name: String,
77    /// Sub-accounts that fan out from the parent.
78    pub sub_accounts: Vec<SubAccountSpec>,
79}
80
81/// A complete industry pack — parsed from one of the embedded YAML files.
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct IndustryPack {
84    /// Industry identifier (matches [`IndustrySector`] in snake_case).
85    pub industry: String,
86    /// Human description.
87    pub description: String,
88    /// Pack schema version.
89    #[serde(default = "default_version")]
90    pub version: u32,
91    /// All canonical-account expansions defined by this pack.
92    pub expansions: Vec<AccountExpansion>,
93}
94
95fn default_version() -> u32 {
96    1
97}
98
99impl IndustryPack {
100    /// Parse a pack from a YAML string.
101    pub fn parse(yaml: &str) -> Result<Self, String> {
102        serde_yaml::from_str(yaml).map_err(|e| format!("industry pack parse error: {e}"))
103    }
104
105    /// Look up an expansion for a given canonical parent account.
106    pub fn expansion_for(&self, parent_account: &str) -> Option<&AccountExpansion> {
107        self.expansions
108            .iter()
109            .find(|e| e.parent_account == parent_account)
110    }
111}
112
113/// Return the embedded YAML source for `sector`, if a pack exists.
114pub fn raw_pack_for(sector: IndustrySector) -> Option<&'static str> {
115    match sector {
116        IndustrySector::Manufacturing => Some(PACK_MANUFACTURING),
117        IndustrySector::Retail => Some(PACK_RETAIL),
118        IndustrySector::FinancialServices => Some(PACK_FINANCIAL_SERVICES),
119        IndustrySector::Healthcare => Some(PACK_HEALTHCARE),
120        IndustrySector::Technology => Some(PACK_TECHNOLOGY),
121        // v5.7.0 MVP ships 5 packs; remaining sectors fall through to
122        // None (no expansion) — packs can be added without API change.
123        IndustrySector::ProfessionalServices
124        | IndustrySector::Energy
125        | IndustrySector::Transportation
126        | IndustrySector::RealEstate
127        | IndustrySector::Telecommunications => None,
128    }
129}
130
131/// Load and parse the pack for `sector`. Returns `Ok(None)` if no pack
132/// is shipped for that sector (no expansion will happen).
133pub fn load_pack(sector: IndustrySector) -> Result<Option<IndustryPack>, String> {
134    match raw_pack_for(sector) {
135        Some(yaml) => IndustryPack::parse(yaml).map(Some),
136        None => Ok(None),
137    }
138}
139
140/// Render the full sub-account number from a parent + suffix.
141///
142/// `parent` is normalised to 4 digits (zero-padded if shorter, or
143/// truncated to its first 4 chars if longer); `suffix` is normalised to
144/// 2 digits (zero-padded if shorter); the result is parent + suffix.
145pub fn render_sub_account_number(parent: &str, suffix: &str) -> String {
146    let parent4: String = parent.chars().take(4).collect();
147    let parent4 = format!("{parent4:0>4}");
148    let suffix2 = format!("{suffix:0>2}");
149    format!("{parent4}{suffix2}")
150}
151
152/// Render the full sub-account name from a parent name + sub-account name.
153pub fn render_sub_account_name(parent_name: &str, sub_name: &str) -> String {
154    format!("{parent_name} — {sub_name}")
155}
156
157// ──────────────────────────────────────────────────────────────────────
158// Tests
159// ──────────────────────────────────────────────────────────────────────
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    /// Every shipped pack must parse, declare a valid industry name,
166    /// and contain at least one expansion with at least two sub-accounts.
167    #[test]
168    fn all_shipped_packs_parse_and_are_well_formed() {
169        for sector in [
170            IndustrySector::Manufacturing,
171            IndustrySector::Retail,
172            IndustrySector::FinancialServices,
173            IndustrySector::Healthcare,
174            IndustrySector::Technology,
175        ] {
176            let pack = load_pack(sector)
177                .unwrap_or_else(|e| panic!("{:?} pack failed to load: {e}", sector))
178                .unwrap_or_else(|| panic!("{:?} pack should be shipped", sector));
179            assert!(!pack.industry.is_empty(), "{:?}: industry empty", sector);
180            assert!(
181                !pack.expansions.is_empty(),
182                "{:?}: at least one expansion required",
183                sector
184            );
185            for exp in &pack.expansions {
186                assert_eq!(
187                    exp.parent_account.len(),
188                    4,
189                    "{:?} parent {:?} should be 4 digits",
190                    sector,
191                    exp.parent_account
192                );
193                assert!(
194                    exp.parent_account.chars().all(|c| c.is_ascii_digit()),
195                    "{:?} parent {:?} should be all digits",
196                    sector,
197                    exp.parent_account
198                );
199                assert!(
200                    exp.sub_accounts.len() >= 2,
201                    "{:?} parent {} should have ≥2 sub-accounts",
202                    sector,
203                    exp.parent_account
204                );
205                for sub in &exp.sub_accounts {
206                    assert_eq!(
207                        sub.suffix.len(),
208                        2,
209                        "{:?} sub suffix {:?} should be 2 chars",
210                        sector,
211                        sub.suffix
212                    );
213                    assert!(
214                        sub.suffix.chars().all(|c| c.is_ascii_digit()),
215                        "{:?} sub suffix {:?} should be digits",
216                        sector,
217                        sub.suffix
218                    );
219                    assert!(
220                        sub.weight > 0.0,
221                        "{:?} sub {:?} weight must be positive",
222                        sector,
223                        sub.name
224                    );
225                    assert!(!sub.name.is_empty(), "{:?} sub name empty", sector);
226                }
227                // Suffixes must be unique within an expansion.
228                let mut suffixes: Vec<&str> =
229                    exp.sub_accounts.iter().map(|s| s.suffix.as_str()).collect();
230                suffixes.sort();
231                let before = suffixes.len();
232                suffixes.dedup();
233                assert_eq!(
234                    before,
235                    suffixes.len(),
236                    "{:?} parent {} has duplicate suffixes",
237                    sector,
238                    exp.parent_account
239                );
240            }
241        }
242    }
243
244    #[test]
245    fn unsupported_sectors_return_none() {
246        assert!(matches!(load_pack(IndustrySector::Energy), Ok(None)));
247        assert!(matches!(
248            load_pack(IndustrySector::ProfessionalServices),
249            Ok(None)
250        ));
251    }
252
253    #[test]
254    fn render_sub_account_number_concatenates_padded() {
255        assert_eq!(render_sub_account_number("4000", "10"), "400010");
256        assert_eq!(render_sub_account_number("4000", "5"), "400005");
257        assert_eq!(render_sub_account_number("60", "10"), "006010");
258    }
259
260    #[test]
261    fn render_sub_account_name_uses_em_dash() {
262        assert_eq!(
263            render_sub_account_name("Product Revenue", "Steel Products"),
264            "Product Revenue — Steel Products"
265        );
266    }
267
268    #[test]
269    fn manufacturing_has_expected_revenue_split() {
270        let pack = load_pack(IndustrySector::Manufacturing).unwrap().unwrap();
271        let rev = pack
272            .expansion_for("4000")
273            .expect("manufacturing must expand 4000");
274        let names: Vec<&str> = rev.sub_accounts.iter().map(|s| s.name.as_str()).collect();
275        assert!(names.iter().any(|n| n.contains("Steel")));
276        assert!(names.iter().any(|n| n.contains("Aluminum")));
277    }
278}