Skip to main content

config_disassembler/xml/parsers/
parse_unique_id.rs

1//! Parse unique ID from XML element for file naming.
2
3use serde_json::Value;
4use sha2::{Digest, Sha256};
5
6use crate::xml::types::XmlElement;
7
8/// Hash the full canonicalized JSON form of an element to derive an 8-char
9/// filename. SHA-256 over distinct content yields distinct prefixes with
10/// vanishingly small collision probability for normal sibling counts.
11fn create_short_hash(element: &XmlElement) -> String {
12    let stringified = serde_json::to_string(element).unwrap_or_default();
13    let mut hasher = Sha256::new();
14    hasher.update(stringified.as_bytes());
15    let result = hasher.finalize();
16    const HEX: &[u8; 16] = b"0123456789abcdef";
17    let mut s = String::with_capacity(8);
18    for b in result.iter().take(4) {
19        s.push(HEX[(b >> 4) as usize] as char);
20        s.push(HEX[(b & 0xf) as usize] as char);
21    }
22    s
23}
24
25/// True only for objects that have at least one element-name child. quick-xml
26/// represents leaf scalars (and attribute-only nodes) as `{ "#text": "..." }` /
27/// `{ "@attr": "...", "#text": "..." }`; those are *not* recursable - if we
28/// recurse into them we end up hashing the same single text-leaf child for
29/// every sibling that happens to start with the same scalar element, which
30/// silently collapses distinct siblings into one filename.
31fn is_recursable_object(value: &Value) -> bool {
32    let Some(obj) = value.as_object() else {
33        return false;
34    };
35    obj.iter()
36        .any(|(k, _)| !k.starts_with('#') && !k.starts_with('@'))
37}
38
39/// Extract string from a value - handles both direct strings and objects with #text (XML leaf elements).
40fn value_as_string(value: &Value) -> Option<String> {
41    if let Some(s) = value.as_str() {
42        return Some(s.to_string());
43    }
44    value
45        .as_object()
46        .and_then(|obj| obj.get("#text"))
47        .and_then(|v| v.as_str())
48        .map(|s| s.to_string())
49}
50
51fn find_direct_field_match(element: &XmlElement, field_names: &[&str]) -> Option<String> {
52    let obj = element.as_object()?;
53    for name in field_names {
54        if let Some(value) = obj.get(*name) {
55            if let Some(s) = value_as_string(value) {
56                return Some(s);
57            }
58        }
59    }
60    None
61}
62
63/// Search for a configured unique-id field anywhere in the subtree rooted at
64/// `element`. Returns `Some(id)` only when a configured field is *actually*
65/// matched; returns `None` when nothing matches so the caller can fall back to
66/// hashing the *outer* element rather than a single inner child.
67fn find_id_in_subtree(element: &XmlElement, unique_id_elements: &str) -> Option<String> {
68    let field_names: Vec<&str> = unique_id_elements.split(',').map(|s| s.trim()).collect();
69    if let Some(direct) = find_direct_field_match(element, &field_names) {
70        return Some(direct);
71    }
72    let obj = element.as_object()?;
73    for (_, child) in obj {
74        if !is_recursable_object(child) {
75            continue;
76        }
77        if let Some(found) = find_id_in_subtree(child, unique_id_elements) {
78            return Some(found);
79        }
80    }
81    None
82}
83
84/// Get a unique ID for an element, using configured fields or a hash of the
85/// *outer* element when no configured field exists in the subtree.
86///
87/// Hashing must be performed on the outer element (not on whatever inner
88/// child the search happened to visit first) so siblings whose first nested
89/// child shares a value - e.g. a list of `<actionOverrides>` that all start
90/// with `<actionName>View</actionName>` - still produce distinct filenames
91/// reflecting their distinct content.
92pub fn parse_unique_id_element(element: &XmlElement, unique_id_elements: Option<&str>) -> String {
93    if let Some(ids) = unique_id_elements {
94        find_id_in_subtree(element, ids).unwrap_or_else(|| create_short_hash(element))
95    } else {
96        create_short_hash(element)
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use serde_json::json;
104
105    #[test]
106    fn finds_direct_field() {
107        let el = json!({ "name": "Get_Info", "label": "Get Info" });
108        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
109    }
110
111    #[test]
112    fn finds_deeply_nested_field() {
113        // value before connector so we find elementReference (matches TS iteration order)
114        let el = json!({
115            "value": { "elementReference": "accts.accounts" },
116            "connector": { "targetReference": "X" }
117        });
118        assert_eq!(
119            parse_unique_id_element(&el, Some("elementReference")),
120            "accts.accounts"
121        );
122    }
123
124    #[test]
125    fn finds_id_in_grandchild() {
126        let el = json!({
127            "wrapper": {
128                "inner": { "name": "NestedName" }
129            }
130        });
131        assert_eq!(parse_unique_id_element(&el, Some("name")), "NestedName");
132    }
133
134    #[test]
135    fn value_as_string_returns_none_for_non_string_non_text_objects() {
136        // Directly named field exists but value is neither a string nor an object with #text.
137        // Exercises the None-return path inside value_as_string plus the "no match, move on"
138        // path inside find_direct_field_match.
139        let el = json!({ "name": { "other": "xxx" } });
140        let id = parse_unique_id_element(&el, Some("name"));
141        // Falls through to the 8-char short-hash fallback.
142        assert_eq!(id.len(), 8);
143    }
144
145    #[test]
146    fn falls_back_to_hash_when_no_match_and_no_nested_object() {
147        // No direct match and no nested object match → hash fallback.
148        let el = json!({ "a": "string", "b": "another" });
149        let id = parse_unique_id_element(&el, Some("name"));
150        assert_eq!(id.len(), 8);
151    }
152
153    #[test]
154    fn hash_fallback_when_unique_id_elements_is_none() {
155        let el = json!({ "a": "b" });
156        let id = parse_unique_id_element(&el, None);
157        assert_eq!(id.len(), 8);
158    }
159
160    #[test]
161    fn non_object_element_returns_hash() {
162        let el = json!("just-a-string");
163        let id = parse_unique_id_element(&el, Some("name"));
164        assert_eq!(id.len(), 8);
165    }
166
167    #[test]
168    fn finds_name_from_text_object() {
169        // XML parser stores leaf elements as { "#text": "value" }
170        let el = json!({
171            "name": { "#text": "Get_Info" },
172            "label": { "#text": "Get Info" },
173            "actionName": { "#text": "GetFirstFromCollection" }
174        });
175        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
176        assert_eq!(
177            parse_unique_id_element(&el, Some("actionName")),
178            "GetFirstFromCollection"
179        );
180    }
181
182    // ---- regression: text-leaf siblings must NOT collapse to one hash ------
183
184    /// Models a `<CustomApplication>`'s `<actionOverrides>`: every block has
185    /// the same `<actionName>View</actionName>` first child but distinct
186    /// `<content>` and `<pageOrSobjectType>` payloads. With the old
187    /// implementation the recursion landed on `{"#text":"View"}` for every
188    /// sibling and they all hashed to the same 8-char prefix, silently
189    /// collapsing 100s of overrides into a single shard that contained only
190    /// the last one written.
191    #[test]
192    fn distinct_siblings_with_shared_first_text_leaf_get_distinct_hashes() {
193        let make_action_override = |i: u32| -> XmlElement {
194            json!({
195                "actionName": { "#text": "View" },
196                "comment": { "#text": format!("Action override {i}") },
197                "content": { "#text": format!("Sample_Page_{i:05}") },
198                "formFactor": { "#text": "Large" },
199                "skipRecordTypeSelect": { "#text": "false" },
200                "type": { "#text": "Flexipage" },
201                "pageOrSobjectType": { "#text": format!("Sample_Object_{i:03}__c") }
202            })
203        };
204
205        // Default unique-id elements ("fullName,name") - none of these are
206        // present on actionOverride children.
207        let ids = Some("fullName,name");
208
209        let mut seen = std::collections::HashSet::new();
210        for i in 1..=128 {
211            let id = parse_unique_id_element(&make_action_override(i), ids);
212            assert_eq!(id.len(), 8, "expected an 8-char short hash, got {id}");
213            assert!(
214                seen.insert(id.clone()),
215                "duplicate hash {id} for actionOverride {i} - distinct siblings collapsed"
216            );
217        }
218    }
219
220    /// Same shape but with no unique-id config at all: must also produce
221    /// distinct hashes per sibling.
222    #[test]
223    fn distinct_siblings_get_distinct_hashes_with_no_unique_id_config() {
224        let mut seen = std::collections::HashSet::new();
225        for i in 1..=64 {
226            let el = json!({
227                "actionName": { "#text": "View" },
228                "content": { "#text": format!("Page_{i}") }
229            });
230            let id = parse_unique_id_element(&el, None);
231            assert!(
232                seen.insert(id.clone()),
233                "duplicate hash {id} at index {i} with no unique-id config"
234            );
235        }
236    }
237
238    /// `find_id_in_subtree` must skip text-leaf wrappers like
239    /// `{"#text": "..."}` rather than treat them as recursable objects.
240    /// Otherwise the search returns a hash of the inner wrapper rather than
241    /// hashing the outer element.
242    #[test]
243    fn text_leaf_wrappers_are_not_recursable() {
244        let leaf = json!({ "#text": "View" });
245        assert!(!is_recursable_object(&leaf));
246
247        let attrs_only = json!({ "@attr": "x", "#text": "y" });
248        assert!(!is_recursable_object(&attrs_only));
249
250        let real = json!({ "name": "x" });
251        assert!(is_recursable_object(&real));
252
253        let mixed = json!({ "@attr": "x", "name": "y" });
254        assert!(is_recursable_object(&mixed));
255    }
256
257    /// Recursion must only return when a configured unique-id field is
258    /// *actually* found, not when a recursive call falls back to its own
259    /// hash. The hash is computed exactly once, at the top level, on the
260    /// outer element.
261    #[test]
262    fn nested_search_does_not_return_inner_hash() {
263        // Two distinct outer elements whose first recursable child has the
264        // same shape. With the old behavior the recursion would compute a
265        // hash of that inner child for both - same hash for distinct outers.
266        // With the fix, each outer is hashed in full and they differ.
267        let a = json!({
268            "wrapper": { "leafA": "shared", "extraA": "different-A" },
269            "outerA": "A"
270        });
271        let b = json!({
272            "wrapper": { "leafA": "shared", "extraA": "different-A" },
273            "outerB": "B"
274        });
275        let id_a = parse_unique_id_element(&a, Some("name"));
276        let id_b = parse_unique_id_element(&b, Some("name"));
277        assert_ne!(id_a, id_b);
278    }
279}