Skip to main content

datasynth_group/
validate.rs

1//! Structural validation of a parsed [`GroupConfig`].
2//!
3//! Call [`validate`] immediately after deserialisation and before any resolution
4//! or manifest-building steps. All detected problems are collected into a single
5//! `GroupError::Config` so the caller sees every issue at once.
6
7use crate::config::{GroupConfig, IcRelationshipConfig};
8use crate::errors::{GroupError, GroupResult};
9
10/// Known named fields on [`crate::config::EntityConfig`] used for Levenshtein
11/// typo detection against keys that land in `overrides`.
12const ENTITY_CONFIG_FIELDS: &[&str] = &[
13    "code",
14    "name",
15    "country",
16    "functional_currency",
17    "scoping_profile",
18    "consolidation_method",
19    "ownership_percent",
20    "parent_code",
21    "acquisition_date",
22    "accounting_framework",
23    "industry",
24    "rows",
25];
26
27/// Maximum Levenshtein distance considered a "likely typo".
28const TYPO_DISTANCE_THRESHOLD: usize = 2;
29
30/// Validate the structural consistency of `cfg`.
31///
32/// Collects **all** errors before returning so the caller sees the complete
33/// picture rather than having to fix and re-run for each problem in turn.
34///
35/// Returns `Ok(())` when no problems are found.
36pub fn validate(cfg: &GroupConfig) -> GroupResult<()> {
37    let mut errors: Vec<String> = Vec::new();
38
39    let entity_codes: std::collections::BTreeSet<&str> = cfg
40        .ownership
41        .entities
42        .iter()
43        .map(|e| e.code.as_str())
44        .collect();
45
46    // -----------------------------------------------------------------------
47    // Check 1: parent_entity_code must appear in ownership.entities
48    // -----------------------------------------------------------------------
49    let parent = cfg.ownership.parent_entity_code.as_str();
50    if !entity_codes.contains(parent) {
51        errors.push(format!(
52            "parent_entity_code {parent} is not present in ownership.entities"
53        ));
54    }
55
56    // -----------------------------------------------------------------------
57    // Check 2 + 3 + 4 + 8: per-entity checks
58    // -----------------------------------------------------------------------
59    for entity in &cfg.ownership.entities {
60        let code = entity.code.as_str();
61
62        // Check 2: scoping_profile must exist in scoping_profiles
63        if !cfg.scoping_profiles.contains_key(&entity.scoping_profile) {
64            errors.push(format!(
65                "entity {code} references unknown scoping_profile {}",
66                entity.scoping_profile
67            ));
68        }
69
70        // Check 3: parent_code (if Some) must exist in ownership.entities
71        if let Some(ref pc) = entity.parent_code {
72            if !entity_codes.contains(pc.as_str()) {
73                errors.push(format!(
74                    "entity {code} has parent_code {pc} which is not in ownership.entities"
75                ));
76            }
77        }
78
79        // Check 4: ownership_percent must be in [0.0, 1.0]
80        if let Some(pct) = entity.ownership_percent {
81            if pct < rust_decimal::Decimal::ZERO || pct > rust_decimal::Decimal::ONE {
82                errors.push(format!(
83                    "entity {code} has ownership_percent {pct} outside [0.0, 1.0]"
84                ));
85            }
86        }
87
88        // Check 8 (I2): typo detection on entity.overrides keys
89        for key in entity.overrides.keys() {
90            if let Some(suggestion) = find_closest_field(key) {
91                errors.push(format!(
92                    "entity {code}: possible typo in field '{key}' — did you mean '{suggestion}'?"
93                ));
94            }
95        }
96    }
97
98    // -----------------------------------------------------------------------
99    // Check 5 + 6: IC relationship validation
100    // -----------------------------------------------------------------------
101    for rel in &cfg.intercompany.relationships {
102        match rel {
103            IcRelationshipConfig::Explicit(ex) => {
104                // Check 5a: seller must be in entities
105                if !entity_codes.contains(ex.seller.as_str()) {
106                    errors.push(format!(
107                        "IC relationship references unknown entity {}",
108                        ex.seller
109                    ));
110                }
111                // Check 5b: buyer must be in entities
112                if !entity_codes.contains(ex.buyer.as_str()) {
113                    errors.push(format!(
114                        "IC relationship references unknown entity {}",
115                        ex.buyer
116                    ));
117                }
118                // Check 5c: self-pair
119                if ex.seller == ex.buyer {
120                    errors.push(format!(
121                        "IC relationship has seller == buyer ({})",
122                        ex.seller
123                    ));
124                }
125            }
126            IcRelationshipConfig::Pattern(pat) => {
127                let p = &pat.pattern;
128
129                // Check 6a: pattern.seller (if Some) must be in entities
130                if let Some(ref seller) = p.seller {
131                    if !entity_codes.contains(seller.as_str()) {
132                        errors.push(format!(
133                            "IC pattern references unknown entity/profile {seller}"
134                        ));
135                    }
136                }
137                // Check 6b: pattern.buyer (if Some) must be in entities
138                if let Some(ref buyer) = p.buyer {
139                    if !entity_codes.contains(buyer.as_str()) {
140                        errors.push(format!(
141                            "IC pattern references unknown entity/profile {buyer}"
142                        ));
143                    }
144                }
145                // Check 6c: seller_scoping_profile (if Some and not "any") must exist
146                if let Some(ref sp) = p.seller_scoping_profile {
147                    if sp != "any" && !cfg.scoping_profiles.contains_key(sp) {
148                        errors.push(format!("IC pattern references unknown entity/profile {sp}"));
149                    }
150                }
151                // Check 6d: buyer_scoping_profile (if Some and not "any") must exist
152                if let Some(ref sp) = p.buyer_scoping_profile {
153                    if sp != "any" && !cfg.scoping_profiles.contains_key(sp) {
154                        errors.push(format!("IC pattern references unknown entity/profile {sp}"));
155                    }
156                }
157            }
158        }
159    }
160
161    if errors.is_empty() {
162        Ok(())
163    } else {
164        Err(GroupError::Config(errors.join("\n")))
165    }
166}
167
168/// Return the closest known `EntityConfig` field name if `key` is within
169/// [`TYPO_DISTANCE_THRESHOLD`] of one, otherwise `None`.
170fn find_closest_field(key: &str) -> Option<&'static str> {
171    let mut best_dist = usize::MAX;
172    let mut best_field = None;
173    for &field in ENTITY_CONFIG_FIELDS {
174        let d = strsim::levenshtein(key, field);
175        if d < best_dist {
176            best_dist = d;
177            best_field = Some(field);
178        }
179    }
180    if best_dist <= TYPO_DISTANCE_THRESHOLD && best_dist > 0 {
181        best_field
182    } else {
183        None
184    }
185}