Skip to main content

datasynth_core/models/
dimensional_export.rs

1//! Dimensional (star-schema) export — a surrogate-key-normalized view of the
2//! general ledger for loading into common GL-analytics platforms.
3//!
4//! Produces a **fact table** of journal-entry lines, where every dimension is an
5//! integer surrogate key, plus a **lookup table per dimension** (key → value) and
6//! a **chart-of-accounts map** (account key → account + description). Keys are
7//! assigned deterministically (sorted distinct value order) so the same ledger
8//! always yields the same encoding.
9//!
10//! Generic by design — no tool/vendor/form-specific naming.
11
12use std::collections::{BTreeMap, BTreeSet};
13
14use super::JournalEntry;
15
16/// A single fact row: one journal-entry line with its dimension keys.
17#[derive(Debug, Clone, PartialEq)]
18pub struct FactRow {
19    pub je_id: String,
20    pub line_number: u32,
21    /// Signed amount (debit positive, credit negative), as a string.
22    pub amount: String,
23    pub drcr_key: u32,
24    pub account_key: u32,
25    pub source_key: u32,
26    pub preparer_key: u32,
27    pub company_key: u32,
28    pub currency_key: u32,
29    pub doc_type_key: u32,
30    pub date_key: u32,
31}
32
33/// A chart-of-accounts dimension row.
34#[derive(Debug, Clone, PartialEq)]
35pub struct AccountDimRow {
36    pub key: u32,
37    pub gl_account: String,
38    pub account_description: String,
39}
40
41/// The full dimensional export: fact table + dimension lookups + COA map.
42#[derive(Debug, Clone)]
43pub struct DimensionalExport {
44    pub fact: Vec<FactRow>,
45    /// Debit/Credit dimension (fixed: 1 = Debit, 2 = Credit).
46    pub drcr: Vec<(u32, String)>,
47    pub accounts: Vec<AccountDimRow>,
48    pub sources: Vec<(u32, String)>,
49    pub preparers: Vec<(u32, String)>,
50    pub companies: Vec<(u32, String)>,
51    pub currencies: Vec<(u32, String)>,
52    pub doc_types: Vec<(u32, String)>,
53    pub dates: Vec<(u32, String)>,
54}
55
56/// Intern distinct values into deterministic 1-based keys (sorted value order).
57fn intern(values: BTreeSet<String>) -> (BTreeMap<String, u32>, Vec<(u32, String)>) {
58    let mut map = BTreeMap::new();
59    let mut rows = Vec::with_capacity(values.len());
60    for (i, v) in values.into_iter().enumerate() {
61        let key = (i + 1) as u32;
62        map.insert(v.clone(), key);
63        rows.push((key, v));
64    }
65    (map, rows)
66}
67
68fn csv_escape(s: &str) -> String {
69    if s.contains([',', '"', '\n', '\r']) {
70        format!("\"{}\"", s.replace('"', "\"\""))
71    } else {
72        s.to_string()
73    }
74}
75
76impl DimensionalExport {
77    /// The source value used for the source dimension: the SAP source code when
78    /// present, else the transaction-source kind.
79    fn source_value(je: &JournalEntry) -> String {
80        je.header
81            .sap_source_code
82            .clone()
83            .unwrap_or_else(|| format!("{:?}", je.header.source))
84    }
85
86    /// Build the dimensional export from journal entries.
87    pub fn from_entries(entries: &[JournalEntry]) -> Self {
88        // Pass 1: collect distinct dimension values.
89        let mut accounts: BTreeMap<String, String> = BTreeMap::new(); // gl_account -> description
90        let mut sources = BTreeSet::new();
91        let mut preparers = BTreeSet::new();
92        let mut companies = BTreeSet::new();
93        let mut currencies = BTreeSet::new();
94        let mut doc_types = BTreeSet::new();
95        let mut dates = BTreeSet::new();
96
97        for je in entries {
98            sources.insert(Self::source_value(je));
99            preparers.insert(je.header.created_by.clone());
100            companies.insert(je.header.company_code.clone());
101            currencies.insert(je.header.currency.clone());
102            doc_types.insert(je.header.document_type.clone());
103            dates.insert(je.header.posting_date.to_string());
104            for line in &je.lines {
105                accounts
106                    .entry(line.gl_account.clone())
107                    .or_insert_with(|| line.account_description.clone().unwrap_or_default());
108            }
109        }
110
111        // Assign keys.
112        let (acct_keys, account_rows): (BTreeMap<String, u32>, Vec<AccountDimRow>) = {
113            let mut map = BTreeMap::new();
114            let mut rows = Vec::with_capacity(accounts.len());
115            for (i, (acct, desc)) in accounts.into_iter().enumerate() {
116                let key = (i + 1) as u32;
117                map.insert(acct.clone(), key);
118                rows.push(AccountDimRow {
119                    key,
120                    gl_account: acct,
121                    account_description: desc,
122                });
123            }
124            (map, rows)
125        };
126        let (src_keys, source_rows) = intern(sources);
127        let (prep_keys, preparer_rows) = intern(preparers);
128        let (comp_keys, company_rows) = intern(companies);
129        let (cur_keys, currency_rows) = intern(currencies);
130        let (dt_keys, doc_type_rows) = intern(doc_types);
131        let (date_keys, date_rows) = intern(dates);
132
133        // Pass 2: build the fact table.
134        let mut fact = Vec::new();
135        for je in entries {
136            let src_key = src_keys[&Self::source_value(je)];
137            let prep_key = prep_keys[&je.header.created_by];
138            let comp_key = comp_keys[&je.header.company_code];
139            let cur_key = cur_keys[&je.header.currency];
140            let dt_key = dt_keys[&je.header.document_type];
141            let date_key = date_keys[&je.header.posting_date.to_string()];
142            let je_id = je.header.document_id.to_string();
143            for line in &je.lines {
144                let is_debit = line.debit_amount > rust_decimal::Decimal::ZERO
145                    || (line.credit_amount.is_zero() && line.debit_amount >= line.credit_amount);
146                let (drcr_key, amount) = if line.debit_amount > line.credit_amount {
147                    (1u32, line.debit_amount)
148                } else {
149                    (2u32, -line.credit_amount)
150                };
151                let _ = is_debit;
152                fact.push(FactRow {
153                    je_id: je_id.clone(),
154                    line_number: line.line_number,
155                    amount: amount.to_string(),
156                    drcr_key,
157                    account_key: acct_keys[&line.gl_account],
158                    source_key: src_key,
159                    preparer_key: prep_key,
160                    company_key: comp_key,
161                    currency_key: cur_key,
162                    doc_type_key: dt_key,
163                    date_key,
164                });
165            }
166        }
167
168        Self {
169            fact,
170            drcr: vec![(1, "Debit".into()), (2, "Credit".into())],
171            accounts: account_rows,
172            sources: source_rows,
173            preparers: preparer_rows,
174            companies: company_rows,
175            currencies: currency_rows,
176            doc_types: doc_type_rows,
177            dates: date_rows,
178        }
179    }
180
181    /// CSV for the fact table.
182    pub fn fact_csv(&self) -> String {
183        let mut s = String::from(
184            "je_id,line_number,amount,drcr_key,account_key,source_key,preparer_key,company_key,currency_key,doc_type_key,date_key\n",
185        );
186        for r in &self.fact {
187            s.push_str(&format!(
188                "{},{},{},{},{},{},{},{},{},{},{}\n",
189                csv_escape(&r.je_id),
190                r.line_number,
191                r.amount,
192                r.drcr_key,
193                r.account_key,
194                r.source_key,
195                r.preparer_key,
196                r.company_key,
197                r.currency_key,
198                r.doc_type_key,
199                r.date_key,
200            ));
201        }
202        s
203    }
204
205    /// CSV for the chart-of-accounts dimension.
206    pub fn account_dim_csv(&self) -> String {
207        let mut s = String::from("account_key,gl_account,account_description\n");
208        for r in &self.accounts {
209            s.push_str(&format!(
210                "{},{},{}\n",
211                r.key,
212                csv_escape(&r.gl_account),
213                csv_escape(&r.account_description)
214            ));
215        }
216        s
217    }
218
219    /// CSV for a simple `(key, value)` dimension with the given column headers.
220    pub fn simple_dim_csv(rows: &[(u32, String)], key_col: &str, val_col: &str) -> String {
221        let mut s = format!("{key_col},{val_col}\n");
222        for (k, v) in rows {
223            s.push_str(&format!("{},{}\n", k, csv_escape(v)));
224        }
225        s
226    }
227
228    /// All export files as `(relative_filename, csv_contents)` pairs.
229    pub fn files(&self) -> Vec<(String, String)> {
230        vec![
231            ("fact_je_lines.csv".into(), self.fact_csv()),
232            ("dim_account.csv".into(), self.account_dim_csv()),
233            (
234                "dim_drcr.csv".into(),
235                Self::simple_dim_csv(&self.drcr, "drcr_key", "drcr"),
236            ),
237            (
238                "dim_source.csv".into(),
239                Self::simple_dim_csv(&self.sources, "source_key", "source"),
240            ),
241            (
242                "dim_preparer.csv".into(),
243                Self::simple_dim_csv(&self.preparers, "preparer_key", "preparer"),
244            ),
245            (
246                "dim_company.csv".into(),
247                Self::simple_dim_csv(&self.companies, "company_key", "company_code"),
248            ),
249            (
250                "dim_currency.csv".into(),
251                Self::simple_dim_csv(&self.currencies, "currency_key", "currency"),
252            ),
253            (
254                "dim_document_type.csv".into(),
255                Self::simple_dim_csv(&self.doc_types, "doc_type_key", "document_type"),
256            ),
257            (
258                "dim_date.csv".into(),
259                Self::simple_dim_csv(&self.dates, "date_key", "posting_date"),
260            ),
261        ]
262    }
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268    use crate::models::journal_entry::{JournalEntry, JournalEntryHeader, JournalEntryLine};
269    use chrono::NaiveDate;
270    use rust_decimal::Decimal;
271
272    fn je(company: &str, lines: Vec<(&str, i64, i64)>) -> JournalEntry {
273        let mut e = JournalEntry::new(JournalEntryHeader::new(
274            company.to_string(),
275            NaiveDate::from_ymd_opt(2026, 3, 15).unwrap(),
276        ));
277        for (i, (acct, dr, cr)) in lines.into_iter().enumerate() {
278            let ln = if dr != 0 {
279                JournalEntryLine::debit(
280                    e.header.document_id,
281                    (i + 1) as u32,
282                    acct.to_string(),
283                    Decimal::from(dr),
284                )
285            } else {
286                JournalEntryLine::credit(
287                    e.header.document_id,
288                    (i + 1) as u32,
289                    acct.to_string(),
290                    Decimal::from(cr),
291                )
292            };
293            e.add_line(ln);
294        }
295        e
296    }
297
298    #[test]
299    fn fact_row_count_matches_lines_and_keys_resolve() {
300        let entries = vec![
301            je("1000", vec![("4000", 1000, 0), ("1100", 0, 1000)]),
302            je("2000", vec![("5000", 500, 0), ("2000", 0, 500)]),
303        ];
304        let ex = DimensionalExport::from_entries(&entries);
305        assert_eq!(ex.fact.len(), 4);
306        assert_eq!(ex.companies.len(), 2);
307        assert_eq!(ex.accounts.len(), 4);
308        // Every fact account_key resolves to a COA row.
309        let valid: std::collections::BTreeSet<u32> = ex.accounts.iter().map(|a| a.key).collect();
310        assert!(ex.fact.iter().all(|f| valid.contains(&f.account_key)));
311        // drcr keys are 1/2.
312        assert!(ex.fact.iter().all(|f| f.drcr_key == 1 || f.drcr_key == 2));
313        // 9 files emitted.
314        assert_eq!(ex.files().len(), 9);
315    }
316
317    #[test]
318    fn keys_are_deterministic() {
319        let entries = vec![je("1000", vec![("4000", 1000, 0), ("1100", 0, 1000)])];
320        let a = DimensionalExport::from_entries(&entries);
321        let b = DimensionalExport::from_entries(&entries);
322        assert_eq!(a.fact_csv(), b.fact_csv());
323        assert_eq!(a.account_dim_csv(), b.account_dim_csv());
324    }
325
326    #[test]
327    fn signed_amounts_and_drcr_align() {
328        let entries = vec![je("1000", vec![("4000", 1000, 0), ("1100", 0, 1000)])];
329        let ex = DimensionalExport::from_entries(&entries);
330        let debit = ex.fact.iter().find(|f| f.drcr_key == 1).unwrap();
331        let credit = ex.fact.iter().find(|f| f.drcr_key == 2).unwrap();
332        assert_eq!(debit.amount, "1000");
333        assert_eq!(credit.amount, "-1000");
334    }
335}