Skip to main content

xml_disassembler/parsers/
parse_unique_id.rs

1//! Parse unique ID from XML element for file naming.
2
3use serde_json::Value;
4use sha2::{Digest, Sha256};
5
6use crate::types::XmlElement;
7
8/// Cache for stringified elements - we use a simple approach in Rust.
9/// For full equivalence we could use a type with interior mutability and weak refs.
10fn create_short_hash(element: &XmlElement) -> String {
11    let stringified = serde_json::to_string(element).unwrap_or_default();
12    let mut hasher = Sha256::new();
13    hasher.update(stringified.as_bytes());
14    let result = hasher.finalize();
15    const HEX: &[u8; 16] = b"0123456789abcdef";
16    let mut s = String::with_capacity(8);
17    for b in result.iter().take(4) {
18        s.push(HEX[(b >> 4) as usize] as char);
19        s.push(HEX[(b & 0xf) as usize] as char);
20    }
21    s
22}
23
24fn is_object(value: &Value) -> bool {
25    value.is_object() && !value.is_array()
26}
27
28/// Extract string from a value - handles both direct strings and objects with #text (XML leaf elements).
29fn value_as_string(value: &Value) -> Option<String> {
30    if let Some(s) = value.as_str() {
31        return Some(s.to_string());
32    }
33    value
34        .as_object()
35        .and_then(|obj| obj.get("#text"))
36        .and_then(|v| v.as_str())
37        .map(|s| s.to_string())
38}
39
40fn find_direct_field_match(element: &XmlElement, field_names: &[&str]) -> Option<String> {
41    let obj = element.as_object()?;
42    for name in field_names {
43        if let Some(value) = obj.get(*name) {
44            if let Some(s) = value_as_string(value) {
45                return Some(s);
46            }
47        }
48    }
49    None
50}
51
52fn find_nested_field_match(element: &XmlElement, unique_id_elements: &str) -> Option<String> {
53    let obj = element.as_object()?;
54    for (_, child) in obj {
55        if is_object(child) {
56            // parse_unique_id_element always returns a non-empty string (falls back to a hash),
57            // so the first nested object match is sufficient.
58            return Some(parse_unique_id_element(child, Some(unique_id_elements)));
59        }
60    }
61    None
62}
63
64/// Get a unique ID for an element, using configured fields or a hash.
65pub fn parse_unique_id_element(element: &XmlElement, unique_id_elements: Option<&str>) -> String {
66    if let Some(ids) = unique_id_elements {
67        let field_names: Vec<&str> = ids.split(',').map(|s| s.trim()).collect();
68        find_direct_field_match(element, &field_names)
69            .or_else(|| find_nested_field_match(element, ids))
70            .unwrap_or_else(|| create_short_hash(element))
71    } else {
72        create_short_hash(element)
73    }
74}
75
76#[cfg(test)]
77mod tests {
78    use super::*;
79    use serde_json::json;
80
81    #[test]
82    fn finds_direct_field() {
83        let el = json!({ "name": "Get_Info", "label": "Get Info" });
84        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
85    }
86
87    #[test]
88    fn finds_deeply_nested_field() {
89        // value before connector so we find elementReference (matches TS iteration order)
90        let el = json!({
91            "value": { "elementReference": "accts.accounts" },
92            "connector": { "targetReference": "X" }
93        });
94        assert_eq!(
95            parse_unique_id_element(&el, Some("elementReference")),
96            "accts.accounts"
97        );
98    }
99
100    #[test]
101    fn finds_id_in_grandchild() {
102        let el = json!({
103            "wrapper": {
104                "inner": { "name": "NestedName" }
105            }
106        });
107        assert_eq!(parse_unique_id_element(&el, Some("name")), "NestedName");
108    }
109
110    #[test]
111    fn value_as_string_returns_none_for_non_string_non_text_objects() {
112        // Directly named field exists but value is neither a string nor an object with #text.
113        // Exercises the None-return path inside value_as_string plus the "no match, move on"
114        // path inside find_direct_field_match.
115        let el = json!({ "name": { "other": "xxx" } });
116        let id = parse_unique_id_element(&el, Some("name"));
117        // Falls through to the 8-char short-hash fallback.
118        assert_eq!(id.len(), 8);
119    }
120
121    #[test]
122    fn falls_back_to_hash_when_no_match_and_no_nested_object() {
123        // No direct match and no nested object match → hash fallback.
124        let el = json!({ "a": "string", "b": "another" });
125        let id = parse_unique_id_element(&el, Some("name"));
126        assert_eq!(id.len(), 8);
127    }
128
129    #[test]
130    fn hash_fallback_when_unique_id_elements_is_none() {
131        let el = json!({ "a": "b" });
132        let id = parse_unique_id_element(&el, None);
133        assert_eq!(id.len(), 8);
134    }
135
136    #[test]
137    fn non_object_element_returns_hash() {
138        let el = json!("just-a-string");
139        let id = parse_unique_id_element(&el, Some("name"));
140        assert_eq!(id.len(), 8);
141    }
142
143    #[test]
144    fn finds_name_from_text_object() {
145        // XML parser stores leaf elements as { "#text": "value" }
146        let el = json!({
147            "name": { "#text": "Get_Info" },
148            "label": { "#text": "Get Info" },
149            "actionName": { "#text": "GetFirstFromCollection" }
150        });
151        assert_eq!(parse_unique_id_element(&el, Some("name")), "Get_Info");
152        assert_eq!(
153            parse_unique_id_element(&el, Some("actionName")),
154            "GetFirstFromCollection"
155        );
156    }
157}