config_disassembler/xml/parsers/parse_unique_id.rs
1//! Parse unique ID from XML element for file naming.
2//!
3//! ## Configuration syntax
4//!
5//! `unique_id_elements` is a comma-separated list of *candidates*; the first
6//! candidate that fully resolves against an element wins. Each candidate is
7//! either:
8//!
9//! * a single field name (e.g. `fullName`) - matches when that field is
10//! present anywhere in the element's subtree, or
11//! * a `+`-joined **compound** of two or more field names (e.g.
12//! `actionName+pageOrSobjectType+formFactor`) - matches only when *every*
13//! sub-field resolves at the same level, in which case the resolved
14//! values are joined with [`COMPOUND_VALUE_SEPARATOR`] (`__`).
15//!
16//! Compounds let metadata types like `<profileActionOverrides>` - whose
17//! natural unique key is `actionName + pageOrSobjectType + formFactor +
18//! profile [+ recordType]` - produce stable, readable filenames instead of
19//! collapsing every sibling into a SHA-256 fallback. Listing both the wide
20//! and narrow forms (`A+B+C+D, A+B+C, A`) gives a graceful fallback chain
21//! when an item only carries some of the keys.
22//!
23//! Backwards compatibility: any spec that contains no `+` is parsed as a
24//! list of single-field candidates and behaves identically to releases
25//! prior to compound-key support.
26
27use serde_json::Value;
28use sha2::{Digest, Sha256};
29
30use crate::xml::types::XmlElement;
31
32/// Separator inserted between resolved values when a compound candidate
33/// matches. Picked because filenames are filesystem-safe everywhere and
34/// because individual Salesforce identifier names rarely contain the
35/// double-underscore (single `_` is common - e.g. `Account_Name__c` - so
36/// a single underscore would round-trip ambiguously when values themselves
37/// already contain `_`).
38const COMPOUND_VALUE_SEPARATOR: &str = "__";
39
40/// Hash the full canonicalized JSON form of an element to derive an 8-char
41/// filename. SHA-256 over distinct content yields distinct prefixes with
42/// vanishingly small collision probability for normal sibling counts.
43fn create_short_hash(element: &XmlElement) -> String {
44 let stringified = serde_json::to_string(element).unwrap_or_default();
45 let mut hasher = Sha256::new();
46 hasher.update(stringified.as_bytes());
47 let result = hasher.finalize();
48 const HEX: &[u8; 16] = b"0123456789abcdef";
49 let mut s = String::with_capacity(8);
50 for b in result.iter().take(4) {
51 s.push(HEX[(b >> 4) as usize] as char);
52 s.push(HEX[(b & 0xf) as usize] as char);
53 }
54 s
55}
56
57/// True only for objects that have at least one element-name child. quick-xml
58/// represents leaf scalars (and attribute-only nodes) as `{ "#text": "..." }` /
59/// `{ "@attr": "...", "#text": "..." }`; those are *not* recursable - if we
60/// recurse into them we end up hashing the same single text-leaf child for
61/// every sibling that happens to start with the same scalar element, which
62/// silently collapses distinct siblings into one filename.
63fn is_recursable_object(value: &Value) -> bool {
64 let Some(obj) = value.as_object() else {
65 return false;
66 };
67 obj.iter()
68 .any(|(k, _)| !k.starts_with('#') && !k.starts_with('@'))
69}
70
71/// Extract string from a value - handles both direct strings and objects with #text (XML leaf elements).
72fn value_as_string(value: &Value) -> Option<String> {
73 if let Some(s) = value.as_str() {
74 return Some(s.to_string());
75 }
76 value
77 .as_object()
78 .and_then(|obj| obj.get("#text"))
79 .and_then(|v| v.as_str())
80 .map(|s| s.to_string())
81}
82
83/// Parse the user-supplied spec into a list of candidates, where each
84/// candidate is itself a list of field names. A candidate of length 1 is a
85/// plain single-field match (legacy behaviour); length >= 2 is a compound.
86///
87/// Empty entries (from leading/trailing commas, double commas, or stray `+`
88/// separators) are filtered so a copy-pasted spec like `, name ,, +foo+ ,`
89/// degrades to `[["name"], ["foo"]]` rather than panicking on empty lookups.
90fn parse_candidates(spec: &str) -> Vec<Vec<&str>> {
91 spec.split(',')
92 .map(|candidate| {
93 candidate
94 .split('+')
95 .map(str::trim)
96 .filter(|f| !f.is_empty())
97 .collect::<Vec<&str>>()
98 })
99 .filter(|fields| !fields.is_empty())
100 .collect()
101}
102
103/// Match a single candidate against the element's *direct* fields. A
104/// single-field candidate succeeds when the field is present and resolves
105/// to a non-empty string; a compound candidate succeeds only when every
106/// sub-field is present and non-empty, in which case the resolved values
107/// are joined with [`COMPOUND_VALUE_SEPARATOR`].
108///
109/// Restricting compounds to the same level keeps the semantics intuitive:
110/// `actionName+profile+recordType` describes a single record's shape, not
111/// a search for those tokens scattered across the subtree.
112fn match_candidate_at_direct(element: &XmlElement, fields: &[&str]) -> Option<String> {
113 let obj = element.as_object()?;
114 let mut parts: Vec<String> = Vec::with_capacity(fields.len());
115 for field in fields {
116 let value = obj.get(*field).and_then(value_as_string)?;
117 if value.is_empty() {
118 return None;
119 }
120 parts.push(value);
121 }
122 if parts.is_empty() {
123 return None;
124 }
125 Some(parts.join(COMPOUND_VALUE_SEPARATOR))
126}
127
128/// Search for a configured unique-id candidate anywhere in the subtree
129/// rooted at `element`. Returns `Some(id)` only when a candidate fully
130/// resolves; returns `None` so the caller can fall back to hashing the
131/// *outer* element rather than a single inner child.
132///
133/// Order of evaluation:
134/// 1. Try every candidate against the direct fields of `element` (so a
135/// direct match always beats a deeper one - preserves the priority that
136/// callers configuring `fullName,name` historically relied on).
137/// 2. If nothing matched, recurse into recursable children and repeat.
138fn find_id_in_subtree(element: &XmlElement, unique_id_elements: &str) -> Option<String> {
139 let candidates = parse_candidates(unique_id_elements);
140 if candidates.is_empty() {
141 return None;
142 }
143 for candidate in &candidates {
144 if let Some(id) = match_candidate_at_direct(element, candidate) {
145 return Some(id);
146 }
147 }
148 let obj = element.as_object()?;
149 for (_, child) in obj {
150 if !is_recursable_object(child) {
151 continue;
152 }
153 if let Some(found) = find_id_in_subtree(child, unique_id_elements) {
154 return Some(found);
155 }
156 }
157 None
158}
159
160/// Get a unique ID for an element, using configured fields or a hash of the
161/// *outer* element when no configured field exists in the subtree.
162///
163/// Hashing must be performed on the outer element (not on whatever inner
164/// child the search happened to visit first) so siblings whose first nested
165/// child shares a value - e.g. a list of `<actionOverrides>` that all start
166/// with `<actionName>View</actionName>` - still produce distinct filenames
167/// reflecting their distinct content.
168pub fn parse_unique_id_element(element: &XmlElement, unique_id_elements: Option<&str>) -> String {
169 if let Some(ids) = unique_id_elements {
170 find_id_in_subtree(element, ids).unwrap_or_else(|| create_short_hash(element))
171 } else {
172 create_short_hash(element)
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179 use serde_json::json;
180
181 #[test]
182 fn finds_direct_field() {
183 let el = json!({ "name": "Get_Info", "label": "Get Info" });
184 assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
185 }
186
187 #[test]
188 fn finds_deeply_nested_field() {
189 // value before connector so we find elementReference (matches TS iteration order)
190 let el = json!({
191 "value": { "elementReference": "accts.accounts" },
192 "connector": { "targetReference": "X" }
193 });
194 assert_eq!(
195 parse_unique_id_element(&el, Some("elementReference")),
196 "accts.accounts"
197 );
198 }
199
200 #[test]
201 fn finds_id_in_grandchild() {
202 let el = json!({
203 "wrapper": {
204 "inner": { "name": "NestedName" }
205 }
206 });
207 assert_eq!(parse_unique_id_element(&el, Some("name")), "NestedName");
208 }
209
210 #[test]
211 fn value_as_string_returns_none_for_non_string_non_text_objects() {
212 // Directly named field exists but value is neither a string nor an object with #text.
213 // Exercises the None-return path inside value_as_string plus the "no match, move on"
214 // path inside find_direct_field_match.
215 let el = json!({ "name": { "other": "xxx" } });
216 let id = parse_unique_id_element(&el, Some("name"));
217 // Falls through to the 8-char short-hash fallback.
218 assert_eq!(id.len(), 8);
219 }
220
221 #[test]
222 fn falls_back_to_hash_when_no_match_and_no_nested_object() {
223 // No direct match and no nested object match → hash fallback.
224 let el = json!({ "a": "string", "b": "another" });
225 let id = parse_unique_id_element(&el, Some("name"));
226 assert_eq!(id.len(), 8);
227 }
228
229 #[test]
230 fn hash_fallback_when_unique_id_elements_is_none() {
231 let el = json!({ "a": "b" });
232 let id = parse_unique_id_element(&el, None);
233 assert_eq!(id.len(), 8);
234 }
235
236 #[test]
237 fn non_object_element_returns_hash() {
238 let el = json!("just-a-string");
239 let id = parse_unique_id_element(&el, Some("name"));
240 assert_eq!(id.len(), 8);
241 }
242
243 #[test]
244 fn finds_name_from_text_object() {
245 // XML parser stores leaf elements as { "#text": "value" }
246 let el = json!({
247 "name": { "#text": "Get_Info" },
248 "label": { "#text": "Get Info" },
249 "actionName": { "#text": "GetFirstFromCollection" }
250 });
251 assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
252 assert_eq!(
253 parse_unique_id_element(&el, Some("actionName")),
254 "GetFirstFromCollection"
255 );
256 }
257
258 // ---- regression: text-leaf siblings must NOT collapse to one hash ------
259
260 /// Models a `<CustomApplication>`'s `<actionOverrides>`: every block has
261 /// the same `<actionName>View</actionName>` first child but distinct
262 /// `<content>` and `<pageOrSobjectType>` payloads. With the old
263 /// implementation the recursion landed on `{"#text":"View"}` for every
264 /// sibling and they all hashed to the same 8-char prefix, silently
265 /// collapsing 100s of overrides into a single shard that contained only
266 /// the last one written.
267 #[test]
268 fn distinct_siblings_with_shared_first_text_leaf_get_distinct_hashes() {
269 let make_action_override = |i: u32| -> XmlElement {
270 json!({
271 "actionName": { "#text": "View" },
272 "comment": { "#text": format!("Action override {i}") },
273 "content": { "#text": format!("Sample_Page_{i:05}") },
274 "formFactor": { "#text": "Large" },
275 "skipRecordTypeSelect": { "#text": "false" },
276 "type": { "#text": "Flexipage" },
277 "pageOrSobjectType": { "#text": format!("Sample_Object_{i:03}__c") }
278 })
279 };
280
281 // Default unique-id elements ("fullName,name") - none of these are
282 // present on actionOverride children.
283 let ids = Some("fullName,name");
284
285 let mut seen = std::collections::HashSet::new();
286 for i in 1..=128 {
287 let id = parse_unique_id_element(&make_action_override(i), ids);
288 assert_eq!(id.len(), 8, "expected an 8-char short hash, got {id}");
289 assert!(
290 seen.insert(id.clone()),
291 "duplicate hash {id} for actionOverride {i} - distinct siblings collapsed"
292 );
293 }
294 }
295
296 /// Same shape but with no unique-id config at all: must also produce
297 /// distinct hashes per sibling.
298 #[test]
299 fn distinct_siblings_get_distinct_hashes_with_no_unique_id_config() {
300 let mut seen = std::collections::HashSet::new();
301 for i in 1..=64 {
302 let el = json!({
303 "actionName": { "#text": "View" },
304 "content": { "#text": format!("Page_{i}") }
305 });
306 let id = parse_unique_id_element(&el, None);
307 assert!(
308 seen.insert(id.clone()),
309 "duplicate hash {id} at index {i} with no unique-id config"
310 );
311 }
312 }
313
314 /// `find_id_in_subtree` must skip text-leaf wrappers like
315 /// `{"#text": "..."}` rather than treat them as recursable objects.
316 /// Otherwise the search returns a hash of the inner wrapper rather than
317 /// hashing the outer element.
318 #[test]
319 fn text_leaf_wrappers_are_not_recursable() {
320 let leaf = json!({ "#text": "View" });
321 assert!(!is_recursable_object(&leaf));
322
323 let attrs_only = json!({ "@attr": "x", "#text": "y" });
324 assert!(!is_recursable_object(&attrs_only));
325
326 let real = json!({ "name": "x" });
327 assert!(is_recursable_object(&real));
328
329 let mixed = json!({ "@attr": "x", "name": "y" });
330 assert!(is_recursable_object(&mixed));
331 }
332
333 // ---- compound-key support ----------------------------------------------
334
335 /// A `<profileActionOverrides>` element with the full key set. The
336 /// compound `actionName+pageOrSobjectType+formFactor+profile` must
337 /// resolve to all four values joined with `__`.
338 #[test]
339 fn compound_resolves_when_all_fields_present() {
340 let el = json!({
341 "actionName": { "#text": "Tab" },
342 "content": { "#text": "Home_Page_Default" },
343 "formFactor": { "#text": "Large" },
344 "pageOrSobjectType": { "#text": "standard-home" },
345 "type": { "#text": "Flexipage" },
346 "profile": { "#text": "Implementation_Lightning" }
347 });
348 let id =
349 parse_unique_id_element(&el, Some("actionName+pageOrSobjectType+formFactor+profile"));
350 assert_eq!(id, "Tab__standard-home__Large__Implementation_Lightning");
351 }
352
353 /// A compound that names a field the element doesn't have must NOT
354 /// match - the next candidate (a narrower compound, then a single
355 /// field) takes over.
356 #[test]
357 fn compound_falls_through_when_one_field_missing() {
358 // `<actionOverrides>` (no profile, no recordType) - the wide compound
359 // must fail, the narrow compound must succeed.
360 let el = json!({
361 "actionName": { "#text": "View" },
362 "content": { "#text": "LUX_Case_Release_Candidate_Copy" },
363 "formFactor": { "#text": "Large" },
364 "pageOrSobjectType": { "#text": "Case" },
365 "type": { "#text": "Flexipage" }
366 });
367 let spec = "actionName+pageOrSobjectType+formFactor+profile,actionName+pageOrSobjectType+formFactor,actionName";
368 assert_eq!(
369 parse_unique_id_element(&el, Some(spec)),
370 "View__Case__Large"
371 );
372 }
373
374 /// All compound candidates miss → the loop must fall back to the
375 /// single-field candidate at the tail of the spec, and ultimately to
376 /// the outer-element hash if even that misses.
377 #[test]
378 fn compound_then_single_then_hash_fallback() {
379 let el = json!({
380 "actionName": { "#text": "View" }
381 });
382 let spec_all_compound =
383 "actionName+pageOrSobjectType+formFactor+profile,actionName+pageOrSobjectType";
384 let id = parse_unique_id_element(&el, Some(spec_all_compound));
385 assert_eq!(
386 id.len(),
387 8,
388 "no candidate should match → hash fallback, got {id}"
389 );
390
391 let spec_with_single_tail = "actionName+pageOrSobjectType+formFactor,actionName";
392 assert_eq!(
393 parse_unique_id_element(&el, Some(spec_with_single_tail)),
394 "View"
395 );
396 }
397
398 /// Empty values (`<recordType></recordType>`) must be treated as
399 /// missing for the purpose of compound matching - otherwise we would
400 /// emit filenames like `View__Account__Large__` with a trailing
401 /// separator and silently collide with siblings that genuinely lack
402 /// the field.
403 #[test]
404 fn compound_treats_empty_values_as_missing() {
405 let el = json!({
406 "actionName": { "#text": "View" },
407 "pageOrSobjectType": { "#text": "Account" },
408 "recordType": { "#text": "" } // explicitly empty
409 });
410 let spec = "actionName+pageOrSobjectType+recordType,actionName+pageOrSobjectType";
411 assert_eq!(
412 parse_unique_id_element(&el, Some(spec)),
413 "View__Account",
414 "empty <recordType> must be treated as missing"
415 );
416 }
417
418 /// Distinct profileActionOverrides siblings sharing actionName +
419 /// pageOrSobjectType + formFactor but differing in `profile` must
420 /// produce distinct compound IDs (not collide).
421 #[test]
422 fn compound_disambiguates_siblings_that_share_outer_fields() {
423 let make = |profile: &str| {
424 json!({
425 "actionName": { "#text": "Tab" },
426 "content": { "#text": "Home_Page_Default" },
427 "formFactor": { "#text": "Large" },
428 "pageOrSobjectType": { "#text": "standard-home" },
429 "type": { "#text": "Flexipage" },
430 "profile": { "#text": profile }
431 })
432 };
433 let spec = "actionName+pageOrSobjectType+formFactor+profile";
434 let a = parse_unique_id_element(&make("Implementation_Lightning"), Some(spec));
435 let b = parse_unique_id_element(&make("Sales_Lightning"), Some(spec));
436 assert_ne!(a, b);
437 assert!(a.ends_with("Implementation_Lightning"));
438 assert!(b.ends_with("Sales_Lightning"));
439 }
440
441 /// A single-field spec must behave identically to releases prior to
442 /// compound-key support: same priority (direct first, then nested),
443 /// same hash fallback, no spurious `__` separators.
444 #[test]
445 fn single_field_behaviour_is_unchanged() {
446 let el = json!({ "name": "Get_Info", "label": "Get Info" });
447 assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
448
449 // Direct vs nested priority preserved.
450 let nested = json!({
451 "wrapper": { "name": "NestedName" }
452 });
453 assert_eq!(parse_unique_id_element(&nested, Some("name")), "NestedName");
454 }
455
456 /// Pathological/malformed specs - leading commas, stray `+`, all
457 /// whitespace - must not panic and must degrade to hash fallback.
458 #[test]
459 fn malformed_spec_degrades_to_hash() {
460 let el = json!({ "foo": "bar" });
461 let id = parse_unique_id_element(&el, Some(",,+,, "));
462 assert_eq!(id.len(), 8, "all-empty candidates → hash fallback");
463 }
464
465 /// Recursion must only return when a configured unique-id field is
466 /// *actually* found, not when a recursive call falls back to its own
467 /// hash. The hash is computed exactly once, at the top level, on the
468 /// outer element.
469 #[test]
470 fn nested_search_does_not_return_inner_hash() {
471 // Two distinct outer elements whose first recursable child has the
472 // same shape. With the old behavior the recursion would compute a
473 // hash of that inner child for both - same hash for distinct outers.
474 // With the fix, each outer is hashed in full and they differ.
475 let a = json!({
476 "wrapper": { "leafA": "shared", "extraA": "different-A" },
477 "outerA": "A"
478 });
479 let b = json!({
480 "wrapper": { "leafA": "shared", "extraA": "different-A" },
481 "outerB": "B"
482 });
483 let id_a = parse_unique_id_element(&a, Some("name"));
484 let id_b = parse_unique_id_element(&b, Some("name"));
485 assert_ne!(id_a, id_b);
486 }
487}