Skip to main content

config_disassembler/xml/parsers/
parse_unique_id.rs

1//! Parse unique ID from XML element for file naming.
2//!
3//! ## Configuration syntax
4//!
5//! `unique_id_elements` is a comma-separated list of *candidates*; the first
6//! candidate that fully resolves against an element wins. Each candidate is
7//! either:
8//!
9//! * a single field name (e.g. `fullName`) - matches when that field is
10//!   present anywhere in the element's subtree, or
11//! * a `+`-joined **compound** of two or more field names (e.g.
12//!   `actionName+pageOrSobjectType+formFactor`) - matches only when *every*
13//!   sub-field resolves at the same level, in which case the resolved
14//!   values are joined with [`COMPOUND_VALUE_SEPARATOR`] (`__`).
15//!
16//! Compounds let metadata types like `<profileActionOverrides>` - whose
17//! natural unique key is `actionName + pageOrSobjectType + formFactor +
18//! profile [+ recordType]` - produce stable, readable filenames instead of
19//! collapsing every sibling into a SHA-256 fallback. Listing both the wide
20//! and narrow forms (`A+B+C+D, A+B+C, A`) gives a graceful fallback chain
21//! when an item only carries some of the keys.
22//!
23//! Backwards compatibility: any spec that contains no `+` is parsed as a
24//! list of single-field candidates and behaves identically to releases
25//! prior to compound-key support.
26
27use serde_json::Value;
28use sha2::{Digest, Sha256};
29
30use crate::xml::types::XmlElement;
31
32/// Separator inserted between resolved values when a compound candidate
33/// matches. Picked because filenames are filesystem-safe everywhere and
34/// because individual Salesforce identifier names rarely contain the
35/// double-underscore (single `_` is common - e.g. `Account_Name__c` - so
36/// a single underscore would round-trip ambiguously when values themselves
37/// already contain `_`).
38const COMPOUND_VALUE_SEPARATOR: &str = "__";
39
40/// Hash the full canonicalized JSON form of an element to derive an 8-char
41/// filename. SHA-256 over distinct content yields distinct prefixes with
42/// vanishingly small collision probability for normal sibling counts.
43fn create_short_hash(element: &XmlElement) -> String {
44    let stringified = serde_json::to_string(element).unwrap_or_default();
45    let mut hasher = Sha256::new();
46    hasher.update(stringified.as_bytes());
47    let result = hasher.finalize();
48    const HEX: &[u8; 16] = b"0123456789abcdef";
49    let mut s = String::with_capacity(8);
50    for b in result.iter().take(4) {
51        s.push(HEX[(b >> 4) as usize] as char);
52        s.push(HEX[(b & 0xf) as usize] as char);
53    }
54    s
55}
56
57/// True only for objects that have at least one element-name child. quick-xml
58/// represents leaf scalars (and attribute-only nodes) as `{ "#text": "..." }` /
59/// `{ "@attr": "...", "#text": "..." }`; those are *not* recursable - if we
60/// recurse into them we end up hashing the same single text-leaf child for
61/// every sibling that happens to start with the same scalar element, which
62/// silently collapses distinct siblings into one filename.
63fn is_recursable_object(value: &Value) -> bool {
64    let Some(obj) = value.as_object() else {
65        return false;
66    };
67    obj.iter()
68        .any(|(k, _)| !k.starts_with('#') && !k.starts_with('@'))
69}
70
71/// Extract string from a value - handles both direct strings and objects with #text (XML leaf elements).
72fn value_as_string(value: &Value) -> Option<String> {
73    if let Some(s) = value.as_str() {
74        return Some(s.to_string());
75    }
76    value
77        .as_object()
78        .and_then(|obj| obj.get("#text"))
79        .and_then(|v| v.as_str())
80        .map(|s| s.to_string())
81}
82
83/// Parse the user-supplied spec into a list of candidates, where each
84/// candidate is itself a list of field names. A candidate of length 1 is a
85/// plain single-field match (legacy behaviour); length >= 2 is a compound.
86///
87/// Empty entries (from leading/trailing commas, double commas, or stray `+`
88/// separators) are filtered so a copy-pasted spec like `, name ,, +foo+ ,`
89/// degrades to `[["name"], ["foo"]]` rather than panicking on empty lookups.
90fn parse_candidates(spec: &str) -> Vec<Vec<&str>> {
91    spec.split(',')
92        .map(|candidate| {
93            candidate
94                .split('+')
95                .map(str::trim)
96                .filter(|f| !f.is_empty())
97                .collect::<Vec<&str>>()
98        })
99        .filter(|fields| !fields.is_empty())
100        .collect()
101}
102
103/// Match a single candidate against the element's *direct* fields. A
104/// single-field candidate succeeds when the field is present and resolves
105/// to a non-empty string; a compound candidate succeeds only when every
106/// sub-field is present and non-empty, in which case the resolved values
107/// are joined with [`COMPOUND_VALUE_SEPARATOR`].
108///
109/// Restricting compounds to the same level keeps the semantics intuitive:
110/// `actionName+profile+recordType` describes a single record's shape, not
111/// a search for those tokens scattered across the subtree.
112fn match_candidate_at_direct(element: &XmlElement, fields: &[&str]) -> Option<String> {
113    let obj = element.as_object()?;
114    let mut parts: Vec<String> = Vec::with_capacity(fields.len());
115    for field in fields {
116        let value = obj.get(*field).and_then(value_as_string)?;
117        if value.is_empty() {
118            return None;
119        }
120        parts.push(value);
121    }
122    if parts.is_empty() {
123        return None;
124    }
125    Some(parts.join(COMPOUND_VALUE_SEPARATOR))
126}
127
128/// Search for a configured unique-id candidate anywhere in the subtree
129/// rooted at `element`. Returns `Some(id)` only when a candidate fully
130/// resolves; returns `None` so the caller can fall back to hashing the
131/// *outer* element rather than a single inner child.
132///
133/// Order of evaluation:
134/// 1. Try every candidate against the direct fields of `element` (so a
135///    direct match always beats a deeper one - preserves the priority that
136///    callers configuring `fullName,name` historically relied on).
137/// 2. If nothing matched, recurse into recursable children and repeat.
138fn find_id_in_subtree(element: &XmlElement, unique_id_elements: &str) -> Option<String> {
139    let candidates = parse_candidates(unique_id_elements);
140    if candidates.is_empty() {
141        return None;
142    }
143    for candidate in &candidates {
144        if let Some(id) = match_candidate_at_direct(element, candidate) {
145            return Some(id);
146        }
147    }
148    let obj = element.as_object()?;
149    for (_, child) in obj {
150        if !is_recursable_object(child) {
151            continue;
152        }
153        if let Some(found) = find_id_in_subtree(child, unique_id_elements) {
154            return Some(found);
155        }
156    }
157    None
158}
159
160/// Get a unique ID for an element, using configured fields or a hash of the
161/// *outer* element when no configured field exists in the subtree.
162///
163/// Hashing must be performed on the outer element (not on whatever inner
164/// child the search happened to visit first) so siblings whose first nested
165/// child shares a value - e.g. a list of `<actionOverrides>` that all start
166/// with `<actionName>View</actionName>` - still produce distinct filenames
167/// reflecting their distinct content.
168pub fn parse_unique_id_element(element: &XmlElement, unique_id_elements: Option<&str>) -> String {
169    if let Some(ids) = unique_id_elements {
170        find_id_in_subtree(element, ids).unwrap_or_else(|| create_short_hash(element))
171    } else {
172        create_short_hash(element)
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179    use serde_json::json;
180
181    #[test]
182    fn finds_direct_field() {
183        let el = json!({ "name": "Get_Info", "label": "Get Info" });
184        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
185    }
186
187    #[test]
188    fn finds_deeply_nested_field() {
189        // value before connector so we find elementReference (matches TS iteration order)
190        let el = json!({
191            "value": { "elementReference": "accts.accounts" },
192            "connector": { "targetReference": "X" }
193        });
194        assert_eq!(
195            parse_unique_id_element(&el, Some("elementReference")),
196            "accts.accounts"
197        );
198    }
199
200    #[test]
201    fn finds_id_in_grandchild() {
202        let el = json!({
203            "wrapper": {
204                "inner": { "name": "NestedName" }
205            }
206        });
207        assert_eq!(parse_unique_id_element(&el, Some("name")), "NestedName");
208    }
209
210    #[test]
211    fn value_as_string_returns_none_for_non_string_non_text_objects() {
212        // Directly named field exists but value is neither a string nor an object with #text.
213        // Exercises the None-return path inside value_as_string plus the "no match, move on"
214        // path inside find_direct_field_match.
215        let el = json!({ "name": { "other": "xxx" } });
216        let id = parse_unique_id_element(&el, Some("name"));
217        // Falls through to the 8-char short-hash fallback.
218        assert_eq!(id.len(), 8);
219    }
220
221    #[test]
222    fn falls_back_to_hash_when_no_match_and_no_nested_object() {
223        // No direct match and no nested object match → hash fallback.
224        let el = json!({ "a": "string", "b": "another" });
225        let id = parse_unique_id_element(&el, Some("name"));
226        assert_eq!(id.len(), 8);
227    }
228
229    #[test]
230    fn hash_fallback_when_unique_id_elements_is_none() {
231        let el = json!({ "a": "b" });
232        let id = parse_unique_id_element(&el, None);
233        assert_eq!(id.len(), 8);
234    }
235
236    #[test]
237    fn non_object_element_returns_hash() {
238        let el = json!("just-a-string");
239        let id = parse_unique_id_element(&el, Some("name"));
240        assert_eq!(id.len(), 8);
241    }
242
243    #[test]
244    fn finds_name_from_text_object() {
245        // XML parser stores leaf elements as { "#text": "value" }
246        let el = json!({
247            "name": { "#text": "Get_Info" },
248            "label": { "#text": "Get Info" },
249            "actionName": { "#text": "GetFirstFromCollection" }
250        });
251        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
252        assert_eq!(
253            parse_unique_id_element(&el, Some("actionName")),
254            "GetFirstFromCollection"
255        );
256    }
257
258    // ---- regression: text-leaf siblings must NOT collapse to one hash ------
259
260    /// Models a `<CustomApplication>`'s `<actionOverrides>`: every block has
261    /// the same `<actionName>View</actionName>` first child but distinct
262    /// `<content>` and `<pageOrSobjectType>` payloads. With the old
263    /// implementation the recursion landed on `{"#text":"View"}` for every
264    /// sibling and they all hashed to the same 8-char prefix, silently
265    /// collapsing 100s of overrides into a single shard that contained only
266    /// the last one written.
267    #[test]
268    fn distinct_siblings_with_shared_first_text_leaf_get_distinct_hashes() {
269        let make_action_override = |i: u32| -> XmlElement {
270            json!({
271                "actionName": { "#text": "View" },
272                "comment": { "#text": format!("Action override {i}") },
273                "content": { "#text": format!("Sample_Page_{i:05}") },
274                "formFactor": { "#text": "Large" },
275                "skipRecordTypeSelect": { "#text": "false" },
276                "type": { "#text": "Flexipage" },
277                "pageOrSobjectType": { "#text": format!("Sample_Object_{i:03}__c") }
278            })
279        };
280
281        // Default unique-id elements ("fullName,name") - none of these are
282        // present on actionOverride children.
283        let ids = Some("fullName,name");
284
285        let mut seen = std::collections::HashSet::new();
286        for i in 1..=128 {
287            let id = parse_unique_id_element(&make_action_override(i), ids);
288            assert_eq!(id.len(), 8, "expected an 8-char short hash, got {id}");
289            assert!(
290                seen.insert(id.clone()),
291                "duplicate hash {id} for actionOverride {i} - distinct siblings collapsed"
292            );
293        }
294    }
295
296    /// Same shape but with no unique-id config at all: must also produce
297    /// distinct hashes per sibling.
298    #[test]
299    fn distinct_siblings_get_distinct_hashes_with_no_unique_id_config() {
300        let mut seen = std::collections::HashSet::new();
301        for i in 1..=64 {
302            let el = json!({
303                "actionName": { "#text": "View" },
304                "content": { "#text": format!("Page_{i}") }
305            });
306            let id = parse_unique_id_element(&el, None);
307            assert!(
308                seen.insert(id.clone()),
309                "duplicate hash {id} at index {i} with no unique-id config"
310            );
311        }
312    }
313
314    /// `find_id_in_subtree` must skip text-leaf wrappers like
315    /// `{"#text": "..."}` rather than treat them as recursable objects.
316    /// Otherwise the search returns a hash of the inner wrapper rather than
317    /// hashing the outer element.
318    #[test]
319    fn text_leaf_wrappers_are_not_recursable() {
320        let leaf = json!({ "#text": "View" });
321        assert!(!is_recursable_object(&leaf));
322
323        let attrs_only = json!({ "@attr": "x", "#text": "y" });
324        assert!(!is_recursable_object(&attrs_only));
325
326        let real = json!({ "name": "x" });
327        assert!(is_recursable_object(&real));
328
329        let mixed = json!({ "@attr": "x", "name": "y" });
330        assert!(is_recursable_object(&mixed));
331    }
332
333    // ---- compound-key support ----------------------------------------------
334
335    /// A `<profileActionOverrides>` element with the full key set. The
336    /// compound `actionName+pageOrSobjectType+formFactor+profile` must
337    /// resolve to all four values joined with `__`.
338    #[test]
339    fn compound_resolves_when_all_fields_present() {
340        let el = json!({
341            "actionName": { "#text": "Tab" },
342            "content": { "#text": "Home_Page_Default" },
343            "formFactor": { "#text": "Large" },
344            "pageOrSobjectType": { "#text": "standard-home" },
345            "type": { "#text": "Flexipage" },
346            "profile": { "#text": "Implementation_Lightning" }
347        });
348        let id =
349            parse_unique_id_element(&el, Some("actionName+pageOrSobjectType+formFactor+profile"));
350        assert_eq!(id, "Tab__standard-home__Large__Implementation_Lightning");
351    }
352
353    /// A compound that names a field the element doesn't have must NOT
354    /// match - the next candidate (a narrower compound, then a single
355    /// field) takes over.
356    #[test]
357    fn compound_falls_through_when_one_field_missing() {
358        // `<actionOverrides>` (no profile, no recordType) - the wide compound
359        // must fail, the narrow compound must succeed.
360        let el = json!({
361            "actionName": { "#text": "View" },
362            "content": { "#text": "LUX_Case_Release_Candidate_Copy" },
363            "formFactor": { "#text": "Large" },
364            "pageOrSobjectType": { "#text": "Case" },
365            "type": { "#text": "Flexipage" }
366        });
367        let spec = "actionName+pageOrSobjectType+formFactor+profile,actionName+pageOrSobjectType+formFactor,actionName";
368        assert_eq!(
369            parse_unique_id_element(&el, Some(spec)),
370            "View__Case__Large"
371        );
372    }
373
374    /// All compound candidates miss → the loop must fall back to the
375    /// single-field candidate at the tail of the spec, and ultimately to
376    /// the outer-element hash if even that misses.
377    #[test]
378    fn compound_then_single_then_hash_fallback() {
379        let el = json!({
380            "actionName": { "#text": "View" }
381        });
382        let spec_all_compound =
383            "actionName+pageOrSobjectType+formFactor+profile,actionName+pageOrSobjectType";
384        let id = parse_unique_id_element(&el, Some(spec_all_compound));
385        assert_eq!(
386            id.len(),
387            8,
388            "no candidate should match → hash fallback, got {id}"
389        );
390
391        let spec_with_single_tail = "actionName+pageOrSobjectType+formFactor,actionName";
392        assert_eq!(
393            parse_unique_id_element(&el, Some(spec_with_single_tail)),
394            "View"
395        );
396    }
397
398    /// Empty values (`<recordType></recordType>`) must be treated as
399    /// missing for the purpose of compound matching - otherwise we would
400    /// emit filenames like `View__Account__Large__` with a trailing
401    /// separator and silently collide with siblings that genuinely lack
402    /// the field.
403    #[test]
404    fn compound_treats_empty_values_as_missing() {
405        let el = json!({
406            "actionName": { "#text": "View" },
407            "pageOrSobjectType": { "#text": "Account" },
408            "recordType": { "#text": "" }  // explicitly empty
409        });
410        let spec = "actionName+pageOrSobjectType+recordType,actionName+pageOrSobjectType";
411        assert_eq!(
412            parse_unique_id_element(&el, Some(spec)),
413            "View__Account",
414            "empty <recordType> must be treated as missing"
415        );
416    }
417
418    /// Distinct profileActionOverrides siblings sharing actionName +
419    /// pageOrSobjectType + formFactor but differing in `profile` must
420    /// produce distinct compound IDs (not collide).
421    #[test]
422    fn compound_disambiguates_siblings_that_share_outer_fields() {
423        let make = |profile: &str| {
424            json!({
425                "actionName": { "#text": "Tab" },
426                "content": { "#text": "Home_Page_Default" },
427                "formFactor": { "#text": "Large" },
428                "pageOrSobjectType": { "#text": "standard-home" },
429                "type": { "#text": "Flexipage" },
430                "profile": { "#text": profile }
431            })
432        };
433        let spec = "actionName+pageOrSobjectType+formFactor+profile";
434        let a = parse_unique_id_element(&make("Implementation_Lightning"), Some(spec));
435        let b = parse_unique_id_element(&make("Sales_Lightning"), Some(spec));
436        assert_ne!(a, b);
437        assert!(a.ends_with("Implementation_Lightning"));
438        assert!(b.ends_with("Sales_Lightning"));
439    }
440
441    /// A single-field spec must behave identically to releases prior to
442    /// compound-key support: same priority (direct first, then nested),
443    /// same hash fallback, no spurious `__` separators.
444    #[test]
445    fn single_field_behaviour_is_unchanged() {
446        let el = json!({ "name": "Get_Info", "label": "Get Info" });
447        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
448
449        // Direct vs nested priority preserved.
450        let nested = json!({
451            "wrapper": { "name": "NestedName" }
452        });
453        assert_eq!(parse_unique_id_element(&nested, Some("name")), "NestedName");
454    }
455
456    /// Pathological/malformed specs - leading commas, stray `+`, all
457    /// whitespace - must not panic and must degrade to hash fallback.
458    #[test]
459    fn malformed_spec_degrades_to_hash() {
460        let el = json!({ "foo": "bar" });
461        let id = parse_unique_id_element(&el, Some(",,+,, "));
462        assert_eq!(id.len(), 8, "all-empty candidates → hash fallback");
463    }
464
465    /// Recursion must only return when a configured unique-id field is
466    /// *actually* found, not when a recursive call falls back to its own
467    /// hash. The hash is computed exactly once, at the top level, on the
468    /// outer element.
469    #[test]
470    fn nested_search_does_not_return_inner_hash() {
471        // Two distinct outer elements whose first recursable child has the
472        // same shape. With the old behavior the recursion would compute a
473        // hash of that inner child for both - same hash for distinct outers.
474        // With the fix, each outer is hashed in full and they differ.
475        let a = json!({
476            "wrapper": { "leafA": "shared", "extraA": "different-A" },
477            "outerA": "A"
478        });
479        let b = json!({
480            "wrapper": { "leafA": "shared", "extraA": "different-A" },
481            "outerB": "B"
482        });
483        let id_a = parse_unique_id_element(&a, Some("name"));
484        let id_b = parse_unique_id_element(&b, Some("name"));
485        assert_ne!(id_a, id_b);
486    }
487}