Skip to main content

exiftool_rs/metadata/
xmp.rs

1//! XMP (Extensible Metadata Platform) reader.
2//!
3//! Parses Adobe XMP metadata stored as XML/RDF. Mirrors ExifTool's XMP.pm.
4
5use crate::error::{Error, Result};
6use crate::tag::{Tag, TagGroup, TagId};
7use crate::value::Value;
8
9use xml::reader::{EventReader, XmlEvent};
10
11/// XMP metadata reader.
12pub struct XmpReader;
13
14/// Known XMP namespace prefixes.
15fn namespace_prefix(uri: &str) -> &str {
16    match uri {
17        "http://purl.org/dc/elements/1.1/" => "dc",
18        "http://ns.adobe.com/xap/1.0/" => "xmp",
19        "http://ns.adobe.com/xap/1.0/mm/" => "xmpMM",
20        "http://ns.adobe.com/xap/1.0/rights/" => "xmpRights",
21        "http://ns.adobe.com/tiff/1.0/" => "tiff",
22        "http://ns.adobe.com/exif/1.0/" => "exif",
23        "http://ns.adobe.com/exif/1.0/aux/" => "aux",
24        "http://ns.adobe.com/photoshop/1.0/" => "photoshop",
25        "http://ns.adobe.com/camera-raw-settings/1.0/" => "crs",
26        "http://ns.adobe.com/lightroom/1.0/" => "lr",
27        "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/" => "Iptc4xmpCore",
28        "http://iptc.org/std/Iptc4xmpExt/2008-02-29/" => "Iptc4xmpExt",
29        "http://ns.google.com/photos/1.0/camera/" => "GCamera",
30        "http://ns.google.com/photos/1.0/image/" => "GImage",
31        "http://ns.google.com/photos/1.0/container/" => "GContainer",
32        "http://ns.google.com/photos/1.0/container/item/" => "GContainerItem",
33        "http://ns.google.com/photos/dd/1.0/device/" => "GDevice",
34        "http://ns.adobe.com/xmp/note/" => "xmpNote",
35        "adobe:ns:meta/" => "x",
36        "http://ns.adobe.com/pdf/1.3/" => "pdf",
37        "http://ns.adobe.com/xap/1.0/bj/" => "xmpBJ",
38        "http://ns.adobe.com/xap/1.0/sType/Job#" => "stJob",
39        "http://ns.adobe.com/xap/1.0/t/pg/" => "xmpTPg",
40        "http://ns.adobe.com/xap/1.0/g/" => "xmpG",
41        "http://ns.adobe.com/xap/1.0/g/img/" => "xmpGImg",
42        "http://ns.adobe.com/xap/1.0/sType/Dimensions#" => "stDim",
43        "http://ns.adobe.com/xap/1.0/sType/ResourceRef#" => "stRef",
44        "http://ns.adobe.com/xap/1.0/sType/Font#" => "stFnt",
45        "http://ns.adobe.com/xap/1.0/sType/ManifestItem#" => "stMfs",
46        "http://www.w3.org/2000/01/rdf-schema#" => "rdfs",
47        "http://ns.microsoft.com/photo/1.0/" => "MicrosoftPhoto",
48        "http://ns.useplus.org/ldf/xmp/1.0/" => "plus",
49        "http://ns.adobe.com/xap/1.0/sType/Area#" => "stArea",
50        "http://www.metadataworkinggroup.com/schemas/regions/" => "mwg-rs",
51        "http://www.metadataworkinggroup.com/schemas/keywords/" => "mwg-kw",
52        _ => "",
53    }
54}
55
56/// Category for an XMP namespace.
57fn namespace_category(prefix: &str) -> &str {
58    match prefix {
59        "dc" => "Author",
60        "xmp" | "xmpMM" | "xmpRights" => "Other",
61        "tiff" => "Image",
62        "exif" | "aux" => "Camera",
63        "photoshop" => "Image",
64        "Iptc4xmpCore" | "Iptc4xmpExt" => "Other",
65        _ => "Other",
66    }
67}
68
69/// Check whether an attribute's local_name is the GCamera HDRPlus makernote field.
70/// ExifTool maps GCamera:HdrPlusMakernote (and GCamera:hdrp_makernote) → HDRPlusMakerNote.
71fn is_hdrp_makernote_attr(local_name: &str) -> bool {
72    local_name == "HdrPlusMakernote" || local_name == "hdrp_makernote"
73}
74
75/// Emit the HDRPlusMakerNote binary tag + all decoded HDRP sub-tags.
76fn emit_hdrp_makernote(b64_value: &str, tags: &mut Vec<Tag>) {
77    use crate::metadata::google_hdrp::decode_hdrp_makernote;
78
79    // Emit HDRPlusMakerNote as a binary tag (ExifTool shows "(Binary data N bytes...)")
80    let raw_bytes = b64_value.trim().len() * 3 / 4; // approximate decoded size
81    let print = format!("(Binary data {} bytes, use -b option to extract)", raw_bytes);
82    tags.push(Tag {
83        id: TagId::Text("GCamera:HdrPlusMakernote".into()),
84        name: "HDRPlusMakerNote".into(),
85        description: "HDRPlusMakerNote".into(),
86        group: TagGroup {
87            family0: "XMP".into(),
88            family1: "XMP-GCamera".into(),
89            family2: "Other".into(),
90        },
91        raw_value: Value::String(b64_value.to_string()),
92        print_value: print,
93        priority: 0,
94    });
95
96    // Decode and emit HDRP protobuf sub-tags
97    let hdrp_tags = decode_hdrp_makernote(b64_value);
98    tags.extend(hdrp_tags);
99}
100
101impl XmpReader {
102    /// Parse XMP metadata from an XML byte slice.
103    pub fn read(data: &[u8]) -> Result<Vec<Tag>> {
104        let mut tags = Vec::new();
105
106        // Handle UTF-16/32 BOM and convert to UTF-8 (from Perl XMP.pm line 4286)
107        // For UTF-16 inputs we need an owned String to borrow from; for UTF-8 we borrow directly.
108        let converted: Option<String> = if data.starts_with(&[0xFE, 0xFF]) {
109            let units: Vec<u16> = data[2..].chunks_exact(2)
110                .map(|c| u16::from_be_bytes([c[0], c[1]])).collect();
111            Some(String::from_utf16_lossy(&units))
112        } else if data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) {
113            let units: Vec<u16> = data[2..].chunks_exact(2)
114                .map(|c| u16::from_le_bytes([c[0], c[1]])).collect();
115            Some(String::from_utf16_lossy(&units))
116        } else if data.len() > 4 && data[0] == 0 && data[1] != 0 {
117            // UTF-16 BE without BOM (starts with \0<)
118            let units: Vec<u16> = data.chunks_exact(2)
119                .map(|c| u16::from_be_bytes([c[0], c[1]])).collect();
120            Some(String::from_utf16_lossy(&units))
121        } else {
122            None
123        };
124        let xml_data: &str = if let Some(ref s) = converted {
125            s.as_str()
126        } else if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
127            // UTF-8 BOM — skip it
128            std::str::from_utf8(&data[3..])
129                .map_err(|e| Error::InvalidXmp(format!("invalid UTF-8: {}", e)))?
130        } else {
131            // UTF-8 (default)
132            std::str::from_utf8(data)
133                .or_else(|_| {
134                    let trimmed = &data[..data.iter().rposition(|&b| b == b'>').unwrap_or(0) + 1];
135                    std::str::from_utf8(trimmed)
136                })
137                .map_err(|e| Error::InvalidXmp(format!("invalid UTF-8: {}", e)))?
138        };
139
140        // Pre-pass: collect rdf:nodeID-mapped bag/seq values for later reference resolution.
141        // Also strip invalid XML chars from xpacket processing instructions.
142        let xml_sanitized: String = sanitize_xmp_xml(xml_data);
143        let xml_clean: String = fix_malformed_xml(&xml_sanitized);
144        let xml_for_parse: &str = &xml_clean;
145
146        // INX detection: InDesign Interchange format — XMP is embedded in CDATA
147        // Detect by: starts with <?xml, followed by <?aid on next line
148        let is_inx = {
149            let trimmed = xml_for_parse.trim_start();
150            trimmed.starts_with("<?xml") && {
151                // Look for <?aid on one of the first few lines
152                trimmed.lines().take(5).any(|l| l.trim_start().starts_with("<?aid "))
153            }
154        };
155        if is_inx {
156            // Extract XMP from CDATA: find '<![CDATA[<?xpacket begin' ... '<?xpacket end...?>]]>'
157            if let Some(cdata_start) = xml_for_parse.find("<![CDATA[<?xpacket begin") {
158                let xmp_start = cdata_start + 9; // skip '<![CDATA['
159                // Find the end: '<?xpacket end="r"?>]]>' or '<?xpacket end="w"?>]]>'
160                if let Some(end_marker) = xml_for_parse[xmp_start..].find("<?xpacket end=") {
161                    let after_end = xmp_start + end_marker;
162                    if let Some(close) = xml_for_parse[after_end..].find("?>") {
163                        let xmp_end = after_end + close + 2; // include '?>'
164                        let xmp_data = &xml_for_parse[xmp_start..xmp_end];
165                        // Recursively parse the embedded XMP
166                        let xmp_bytes = xmp_data.as_bytes().to_vec();
167                        return XmpReader::read(&xmp_bytes);
168                    }
169                }
170            }
171            return Ok(tags);
172        }
173
174        // Check if this is RDF/XMP format or generic XML
175        let is_rdf = xml_for_parse.contains("rdf:RDF") || xml_for_parse.contains("rdf:Description");
176        if !is_rdf {
177            // Generic XML: extract tags by building tag names from element paths
178            return read_generic_xml(xml_for_parse);
179        }
180
181        // Pre-pass: collect rdf:nodeID → list values (for Bag/Seq with nodeIDs)
182        let node_bags: std::collections::HashMap<String, Vec<String>> =
183            collect_node_bag_values(xml_for_parse);
184
185        // Pre-pass: collect all properties of blank nodes (rdf:nodeID Descriptions)
186        // Maps nodeID → Vec<(ns_uri, local_name, value)>
187        let blank_node_props: std::collections::HashMap<String, Vec<(String, String, String)>> =
188            collect_blank_node_properties(xml_for_parse);
189
190        // Pre-pass: find nodeIDs that are "inline referenced" — i.e., they appear as
191        // <rdf:Description rdf:nodeID="X"> INSIDE another property element (not at the RDF top level).
192        // These blank nodes should suppress direct property emission from top-level descriptions.
193        let inline_referenced_node_ids: std::collections::HashSet<String> =
194            collect_inline_referenced_node_ids(xml_for_parse);
195
196        let parser = EventReader::from_str(xml_for_parse);
197        let mut path: Vec<(String, String)> = Vec::new(); // (namespace, local_name)
198        let mut current_text = String::new();
199        let mut in_rdf_li = false;
200        let mut list_values: Vec<String> = Vec::new();
201        // Track depths where we should emit even with empty text (ExifTool et:id format)
202        let mut emit_empty_depths: std::collections::HashSet<usize> = std::collections::HashSet::new();
203        // Track elements with rdf:parseType='Resource' (bare structs).
204        // Each entry is the path depth at which we entered such an element.
205        let mut parse_resource_depths: Vec<usize> = Vec::new();
206
207        // Blank node tracking: when we enter a <rdf:Description rdf:nodeID="X"> inside a property,
208        // track the nodeID and parent property so we can emit all blank node props on close.
209        // Stack of (nodeID, parent_local_name) for inline blank node Descriptions.
210        let mut inline_blank_node_stack: Vec<(String, String)> = Vec::new();
211        // Track depth of top-level blank-node Descriptions (parent is rdf:RDF or rdf:Description without property parent).
212        // Properties inside these should NOT be emitted directly — only via blank node references.
213        let mut suppress_direct_emit_depth: Option<usize> = None;
214
215        // GContainer struct: collect per-field lists for DirectoryItemMime/Semantic/Length.
216        // Key: flat field name (e.g. "Mime", "Semantic", "Length"), Values: collected per li.
217        let mut gcontainer_fields: std::collections::HashMap<String, Vec<String>> =
218            std::collections::HashMap::new();
219        // Whether we're currently inside a GContainer:Directory/Seq context.
220        let mut in_gcontainer_seq = false;
221        // Whether we're inside a GContainer:Directory/Seq/li (struct li).
222        let mut in_gcontainer_li = false;
223
224        // Language-alt tracking:
225        // - current_li_lang: the xml:lang value on the current inner rdf:li
226        // - in_lang_alt: we're inside a rdf:Alt element
227        // - lang_alt_in_bag: the rdf:Alt is itself inside an outer rdf:li (Bag-of-lang-alt)
228        // - bag_lang_values: per-lang accumulated list for bag-of-lang-alt
229        // - bag_item_count: number of Bag items processed (for empty-slot tracking)
230        let mut current_li_lang: Option<String> = None;
231        let mut in_lang_alt = false;
232        let mut lang_alt_in_bag = false;
233        let mut bag_lang_values: std::collections::HashMap<String, Vec<Option<String>>> =
234            std::collections::HashMap::new();
235        let mut bag_item_count: usize = 0;
236
237        for event in parser {
238            match event {
239                Ok(XmlEvent::StartElement {
240                    name, attributes, ..
241                }) => {
242                    // Track the path
243                    let ns_uri = name.namespace.as_deref().unwrap_or("");
244                    path.push((ns_uri.to_string(), name.local_name.clone()));
245                    current_text.clear();
246
247                    // Track elements with et:id (ExifTool internal format): emit even if empty
248                    let has_et_id = attributes.iter().any(|a| {
249                        a.name.local_name == "id"
250                            && (a.name.prefix.as_deref() == Some("et")
251                                || a.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/"))
252                    });
253                    if has_et_id {
254                        emit_empty_depths.insert(path.len());
255                    }
256
257                    // Track rdf:parseType='Resource' (bare struct context)
258                    let has_parse_resource = attributes.iter().any(|a| {
259                        a.name.local_name == "parseType"
260                            && (a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
261                                || a.name.prefix.as_deref() == Some("rdf"))
262                            && a.value == "Resource"
263                    });
264                    if has_parse_resource {
265                        parse_resource_depths.push(path.len());
266                    }
267
268                    // x:xmpmeta or x:xapmeta — extract XMPToolkit from x:xmptk/x:xaptk attribute
269                    if name.local_name == "xmpmeta" || name.local_name == "xapmeta" {
270                        for attr in &attributes {
271                            if attr.name.local_name == "xmptk" || attr.name.local_name == "xaptk" {
272                                tags.push(Tag {
273                                    id: TagId::Text("x:xmptk".into()),
274                                    name: "XMPToolkit".into(),
275                                    description: "XMP Toolkit".into(),
276                                    group: TagGroup { family0: "XMP".into(), family1: "XMP-x".into(), family2: "Other".into() },
277                                    raw_value: Value::String(attr.value.clone()),
278                                    print_value: attr.value.clone(),
279                                    priority: 0,
280                                });
281                            }
282                        }
283                    }
284
285                    // Check if this element has a rdf:nodeID reference to a known bag/seq or blank node.
286                    // E.g., <dc:subject rdf:nodeID="anon2"/> — emit the bag values as a tag.
287                    // E.g., <ph:tester rdf:nodeID="abc"/> — emit all blank node properties as TesterXxx.
288                    // This is for non-Description elements that reference a nodeID bag/blank-node.
289                    if name.local_name != "Description"
290                        && name.local_name != "RDF"
291                        && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
292                    {
293                        if let Some(node_ref) = attributes.iter().find(|a| {
294                            a.name.local_name == "nodeID"
295                                && (a.name.prefix.as_deref() == Some("rdf")
296                                    || a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
297                        }) {
298                            let node_id = &node_ref.value;
299                            if let Some(bag_values) = node_bags.get(node_id) {
300                                let ns_uri = name.namespace.as_deref().unwrap_or("");
301                                let prefix = namespace_prefix(ns_uri);
302                                let group_prefix = if prefix.is_empty() {
303                                    name.prefix.as_deref().unwrap_or("XMP")
304                                } else {
305                                    prefix
306                                };
307                                let category = namespace_category(group_prefix);
308                                let full_name = ucfirst(&name.local_name);
309                                let value = if bag_values.len() == 1 {
310                                    Value::String(bag_values[0].clone())
311                                } else {
312                                    Value::List(bag_values.iter().map(|s| Value::String(s.clone())).collect())
313                                };
314                                let pv = value.to_display_string();
315                                tags.push(Tag {
316                                    id: TagId::Text(format!("{}:{}", group_prefix, name.local_name)),
317                                    name: full_name.clone(),
318                                    description: full_name,
319                                    group: TagGroup {
320                                        family0: "XMP".into(),
321                                        family1: format!("XMP-{}", group_prefix),
322                                        family2: category.into(),
323                                    },
324                                    raw_value: value,
325                                    print_value: pv,
326                                    priority: 0,
327                                });
328                            }
329                            // Blank node properties: emit all properties prefixed with this element's name
330                            if let Some(bn_props) = blank_node_props.get(node_id) {
331                                let elem_ns = name.namespace.as_deref().unwrap_or("");
332                                let elem_prefix_ns = namespace_prefix(elem_ns);
333                                let elem_group = if elem_prefix_ns.is_empty() {
334                                    name.prefix.as_deref().unwrap_or("XMP")
335                                } else { elem_prefix_ns };
336                                let parent_uc = ucfirst(&strip_non_ascii(&name.local_name));
337                                // Build prefix from ancestor path + this element
338                                let anc_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
339                                let elem_flat = if anc_prefix.is_empty() {
340                                    parent_uc.clone()
341                                } else {
342                                    let stripped = strip_struct_prefix(&anc_prefix, &parent_uc);
343                                    format!("{}{}", anc_prefix, stripped)
344                                };
345                                for (prop_ns, prop_local, prop_val) in bn_props {
346                                    let prop_prefix = namespace_prefix(prop_ns);
347                                    let prop_group = if prop_prefix.is_empty() { elem_group } else { prop_prefix };
348                                    let prop_cat = namespace_category(prop_group);
349                                    let prop_uc = ucfirst(&strip_non_ascii(prop_local));
350                                    let stripped = strip_struct_prefix(&elem_flat, &prop_uc);
351                                    let flat_raw = format!("{}{}", elem_flat, stripped);
352                                    let flat = apply_flat_name_remap(&flat_raw).to_string();
353                                    tags.push(Tag {
354                                        id: TagId::Text(format!("{}:{}", prop_group, flat)),
355                                        name: flat.clone(),
356                                        description: flat,
357                                        group: TagGroup {
358                                            family0: "XMP".into(),
359                                            family1: format!("XMP-{}", prop_group),
360                                            family2: prop_cat.into(),
361                                        },
362                                        raw_value: Value::String(prop_val.clone()),
363                                        print_value: prop_val.clone(),
364                                        priority: 0,
365                                    });
366                                }
367                            }
368                        }
369                    }
370
371                    // Handle rdf:resource attribute on property elements (RDF/XML shorthand).
372                    // E.g., <rdfs:seeAlso rdf:resource='plus:Licensee'/> → SeeAlso = plus:Licensee
373                    // This is like a simple text value but provided via rdf:resource attribute.
374                    // Skip if inside a suppressed blank-node Description.
375                    let in_suppressed_bn = suppress_direct_emit_depth
376                        .map(|d| path.len() > d)
377                        .unwrap_or(false);
378                    if name.local_name != "Description"
379                        && name.local_name != "RDF"
380                        && !in_suppressed_bn
381                        && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
382                    {
383                        if let Some(res_attr) = attributes.iter().find(|a| {
384                            a.name.local_name == "resource"
385                                && (a.name.prefix.as_deref() == Some("rdf")
386                                    || a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
387                        }) {
388                            let resource_val = res_attr.value.clone();
389                            let ns_uri = name.namespace.as_deref().unwrap_or("");
390                            let prefix = namespace_prefix(ns_uri);
391                            let group_prefix = if prefix.is_empty() {
392                                name.prefix.as_deref().unwrap_or("XMP")
393                            } else {
394                                prefix
395                            };
396                            let category = namespace_category(group_prefix);
397                            // Build full tag name using ancestor path
398                            let remapped = remap_xmp_tag_name(group_prefix, &name.local_name);
399                            let full_name = if !parse_resource_depths.is_empty() {
400                                let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
401                                if !ancestor_prefix.is_empty() {
402                                    let field_stripped = strip_struct_prefix(&ancestor_prefix, &remapped);
403                                    let candidate = format!("{}{}", ancestor_prefix, field_stripped);
404                                    apply_flat_name_remap(&candidate).to_string()
405                                } else {
406                                    apply_flat_name_remap(&remapped).to_string()
407                                }
408                            } else {
409                                apply_flat_name_remap(&remapped).to_string()
410                            };
411                            tags.push(Tag {
412                                id: TagId::Text(format!("{}:{}", group_prefix, name.local_name)),
413                                name: full_name.clone(),
414                                description: full_name,
415                                group: TagGroup {
416                                    family0: "XMP".into(),
417                                    family1: format!("XMP-{}", group_prefix),
418                                    family2: category.into(),
419                                },
420                                raw_value: Value::String(resource_val.clone()),
421                                print_value: resource_val,
422                                priority: 0,
423                            });
424                        }
425                    }
426
427                    // Pre-check: is this a top-level nodeID Description that should suppress direct emission?
428                    // Only suppress if the nodeID is also referenced inline (inside a property element).
429                    let rdf_ns_check = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
430                    let desc_node_id = attributes.iter().find(|a| {
431                        a.name.local_name == "nodeID"
432                            && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns_check))
433                    }).map(|a| a.value.clone());
434                    let is_top_level_blank_node_desc = name.local_name == "Description"
435                        && name.namespace.as_deref() == Some(rdf_ns_check)
436                        && desc_node_id.as_ref().map(|nid| inline_referenced_node_ids.contains(nid.as_str())).unwrap_or(false)
437                        && path.iter().rev().nth(1)
438                            .map(|(ns, ln)| ns == rdf_ns_check || ln == "RDF" || ln == "xmpmeta" || ln == "xapmeta")
439                            .unwrap_or(true); // if no parent, treat as top-level
440
441                    // Extract attributes on rdf:Description as tags
442                    // e.g., <rdf:Description GCamera:HdrPlusMakernote="...">
443                    // Skip if this is a top-level blank node Description (its attrs stored in blank_node_props).
444                    if name.local_name == "Description" && !is_top_level_blank_node_desc {
445                        for attr in &attributes {
446                            // Emit rdf:about as "About" tag, skip xmlns
447                            if attr.name.local_name == "about" {
448                                if !attr.value.is_empty() {
449                                    tags.push(Tag {
450                                        id: TagId::Text("rdf:about".into()),
451                                        name: "About".into(), description: "About".into(),
452                                        group: TagGroup { family0: "XMP".into(), family1: "XMP-rdf".into(), family2: "Other".into() },
453                                        raw_value: Value::String(attr.value.clone()),
454                                        print_value: attr.value.clone(), priority: 0,
455                                    });
456                                }
457                                continue;
458                            }
459                            // Skip rdf:nodeID on Description (it's just an identifier, not a value)
460                            if attr.name.local_name == "nodeID"
461                                && (attr.name.prefix.as_deref() == Some("rdf")
462                                    || attr.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
463                            {
464                                continue;
465                            }
466                            if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
467                            if attr.name.local_name.starts_with("xmlns") { continue; }
468                            // Skip ExifTool-internal attributes (et:toolkit, et:id, et:desc, etc.)
469                            if attr.name.prefix.as_deref() == Some("et")
470                                || attr.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/")
471                                || attr.name.namespace.as_deref() == Some("http://ns.exiftool.ca/1.0/")
472                            { continue; }
473
474                            let attr_ns = attr.name.namespace.as_deref().unwrap_or("");
475                            let attr_prefix = namespace_prefix(attr_ns);
476                            let group_prefix = if attr_prefix.is_empty() {
477                                attr.name.prefix.as_deref().unwrap_or("XMP")
478                            } else {
479                                attr_prefix
480                            };
481
482                            {
483                                // Special handling: GCamera:HdrPlusMakernote / GCamera:hdrp_makernote
484                                // → emit HDRPlusMakerNote (binary) + decode HDRP sub-tags (non-empty only)
485                                if !attr.value.is_empty() && group_prefix == "GCamera" && is_hdrp_makernote_attr(&attr.name.local_name) {
486                                    emit_hdrp_makernote(&attr.value, &mut tags);
487                                    continue;
488                                }
489
490                                let category = namespace_category(group_prefix);
491                                let remapped = remap_xmp_tag_name(group_prefix, &attr.name.local_name);
492                                // If Description is inline inside a property element,
493                                // prefix the tag with the property element's name.
494                                // path = [..., parent_prop, Description] → parent_prop is rev().nth(1)
495                                let rdf_ns2 = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
496                                let full_name = if let Some(parent_elem) = path.iter().rev().nth(1) {
497                                    if parent_elem.0 != rdf_ns2
498                                        && parent_elem.1 != "RDF"
499                                        && parent_elem.1 != "xmpmeta"
500                                        && parent_elem.1 != "xapmeta"
501                                    {
502                                        let parent_uc = ucfirst(&strip_non_ascii(&parent_elem.1));
503                                        let field_uc = ucfirst(&strip_non_ascii(&attr.name.local_name));
504                                        let stripped = strip_struct_prefix(&parent_uc, &field_uc);
505                                        apply_flat_name_remap(&format!("{}{}", parent_uc, stripped)).to_string()
506                                    } else {
507                                        remapped
508                                    }
509                                } else {
510                                    remapped
511                                };
512                                tags.push(Tag {
513                                    id: TagId::Text(format!("{}:{}", group_prefix, attr.name.local_name)),
514                                    name: full_name.clone(),
515                                    description: full_name,
516                                    group: TagGroup {
517                                        family0: "XMP".to_string(),
518                                        family1: format!("XMP-{}", group_prefix),
519                                        family2: category.to_string(),
520                                    },
521                                    raw_value: parse_xmp_value(&attr.value),
522                                    print_value: attr.value.clone(),
523                                    priority: 0,
524                                });
525                            }
526                        }
527                    }
528
529                    // Track inline blank-node Descriptions: <rdf:Description rdf:nodeID="X"> inside a property.
530                    // When this Description closes, we emit ALL blank node X properties with the parent prefix.
531                    // Top-level blank-node Descriptions that are inline-referenced suppress direct emission.
532                    // Top-level blank-node Descriptions NOT inline-referenced emit their props directly here.
533                    let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
534                    if name.local_name == "Description" && name.namespace.as_deref() == Some(rdf_ns) {
535                        if let Some(nid_attr) = attributes.iter().find(|a| {
536                            a.name.local_name == "nodeID"
537                                && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns))
538                        }) {
539                            let nid = nid_attr.value.clone();
540                            // Check if parent is a non-RDF property element (inline reference)
541                            let parent_is_property = path.iter().rev().nth(1).map(|(pns, pln)| {
542                                pns != rdf_ns && pln != "RDF" && pln != "xmpmeta" && pln != "xapmeta"
543                            }).unwrap_or(false);
544
545                            if parent_is_property {
546                                // Inline blank-node: parent property will claim these props
547                                if let Some(parent) = path.iter().rev().nth(1) {
548                                    inline_blank_node_stack.push((nid, parent.1.clone()));
549                                }
550                            } else if inline_referenced_node_ids.contains(nid.as_str()) {
551                                // Top-level nodeID Description that IS referenced inline elsewhere:
552                                // suppress direct emission here (props will be emitted when the inline ref is processed).
553                                suppress_direct_emit_depth = Some(path.len());
554                            } else {
555                                // Top-level nodeID Description NOT referenced inline anywhere:
556                                // emit all its blank-node properties directly now.
557                                if let Some(bn_props) = blank_node_props.get(nid.as_str()) {
558                                    for (prop_ns, prop_local, prop_val) in bn_props.clone() {
559                                        let prop_prefix = namespace_prefix(&prop_ns);
560                                        let prop_prefix = if prop_prefix.is_empty() { "XMP" } else { prop_prefix };
561                                        let category = namespace_category(prop_prefix);
562                                        let remapped = remap_xmp_tag_name(prop_prefix, &prop_local);
563                                        tags.push(Tag {
564                                            id: TagId::Text(format!("{}:{}", prop_prefix, prop_local)),
565                                            name: remapped.clone(),
566                                            description: remapped,
567                                            group: TagGroup {
568                                                family0: "XMP".to_string(),
569                                                family1: format!("XMP-{}", prop_prefix),
570                                                family2: category.to_string(),
571                                            },
572                                            raw_value: parse_xmp_value(&prop_val),
573                                            print_value: prop_val.clone(),
574                                            priority: 0,
575                                        });
576                                    }
577                                }
578                                // Suppress the rest of the Description processing (already emitted)
579                                suppress_direct_emit_depth = Some(path.len());
580                            }
581                        }
582                    }
583
584                    // Extract attributes on non-RDF struct elements (shorthand struct values).
585                    // E.g., <exif:Flash exif:Fired="False" exif:Mode="2" .../>
586                    //        <xapMM:DerivedFrom stRef:instanceID="..." stRef:documentID="..."/>
587                    // These attributes are struct fields, flattened as ParentFieldName.
588                    // Only apply when the element is NOT a Description and NOT an RDF structural element.
589                    let is_rdf_structural = name.namespace.as_deref() == Some(rdf_ns)
590                        || name.local_name == "Description"
591                        || name.local_name == "RDF"
592                        || name.local_name == "li"
593                        || name.local_name == "Seq"
594                        || name.local_name == "Bag"
595                        || name.local_name == "Alt"
596                        || name.local_name == "xmpmeta"
597                        || name.local_name == "xapmeta"
598                        || name.namespace.as_deref() == Some("adobe:ns:meta/");
599                    if !is_rdf_structural && !attributes.is_empty() {
600                        let elem_ns = name.namespace.as_deref().unwrap_or("");
601                        let elem_prefix = namespace_prefix(elem_ns);
602                        let elem_group = if elem_prefix.is_empty() {
603                            name.prefix.as_deref().unwrap_or("XMP")
604                        } else {
605                            elem_prefix
606                        };
607                        // Build struct parent context: path of ancestor names BEFORE this element.
608                        // path already includes the current element (just pushed), so we use
609                        // build_struct_tag_prefix_without_last to exclude the current element.
610                        let ancestors_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
611                        let elem_uc = ucfirst(&strip_non_ascii(&name.local_name));
612                        // Full path including this element: ancestors_prefix + elem_uc (with strip)
613                        let elem_flat = if ancestors_prefix.is_empty() {
614                            elem_uc.clone()
615                        } else {
616                            let stripped = strip_struct_prefix(&ancestors_prefix, &elem_uc);
617                            format!("{}{}", ancestors_prefix, stripped)
618                        };
619
620                        for attr in &attributes {
621                            // Skip xmlns, rdf:*, et:*, xml:* attributes
622                            if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
623                            if attr.name.local_name.starts_with("xmlns") { continue; }
624                            if attr.name.prefix.as_deref() == Some("rdf")
625                                || attr.name.namespace.as_deref() == Some(rdf_ns) { continue; }
626                            if attr.name.prefix.as_deref() == Some("et")
627                                || attr.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/")
628                                || attr.name.namespace.as_deref() == Some("http://ns.exiftool.ca/1.0/") { continue; }
629                            if attr.name.prefix.as_deref() == Some("xml")
630                                || attr.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace") { continue; }
631
632                            let attr_ns = attr.name.namespace.as_deref().unwrap_or("");
633                            let attr_prefix_resolved = namespace_prefix(attr_ns);
634                            let attr_group = if attr_prefix_resolved.is_empty() {
635                                attr.name.prefix.as_deref().unwrap_or(elem_group)
636                            } else {
637                                attr_prefix_resolved
638                            };
639                            let field_uc = ucfirst(&strip_non_ascii(&attr.name.local_name));
640                            // Build flattened name: elem_flat + field_stripped
641                            let field_stripped = strip_struct_prefix(&elem_flat, &field_uc);
642                            let flat_name_raw = format!("{}{}", elem_flat, field_stripped);
643                            let flat_name = apply_flat_name_remap(&flat_name_raw).to_string();
644                            let category = namespace_category(attr_group);
645                            let pv = attr.value.clone();
646                            tags.push(Tag {
647                                id: TagId::Text(format!("{}:{}", attr_group, flat_name)),
648                                name: flat_name.clone(),
649                                description: flat_name,
650                                group: TagGroup {
651                                    family0: "XMP".into(),
652                                    family1: format!("XMP-{}", elem_group),
653                                    family2: category.into(),
654                                },
655                                raw_value: parse_xmp_value(&attr.value),
656                                print_value: pv,
657                                priority: 0,
658                            });
659                        }
660                    }
661
662                    // Detect GContainer:Directory/rdf:Seq entry
663                    // Path looks like: [..., (GContainer_ns, "Directory"), (rdf_ns, "Seq")]
664                    if name.local_name == "Seq"
665                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
666                    {
667                        // Check if the parent is GContainer:Directory
668                        if let Some(parent) = path.iter().rev().nth(1) {
669                            if parent.1 == "Directory"
670                                && parent.0 == "http://ns.google.com/photos/1.0/container/"
671                            {
672                                in_gcontainer_seq = true;
673                            }
674                        }
675                    }
676
677                    // Inside GContainer Seq: rdf:li starts a struct item
678                    if in_gcontainer_seq
679                        && name.local_name == "li"
680                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
681                    {
682                        in_gcontainer_li = true;
683                        in_rdf_li = true;
684                    } else if name.local_name == "li"
685                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
686                    {
687                        if in_lang_alt {
688                            // Inner rdf:li inside rdf:Alt — track the xml:lang attribute
689                            current_li_lang = attributes.iter()
690                                .find(|a| a.name.local_name == "lang"
691                                    && (a.name.prefix.as_deref() == Some("xml")
692                                        || a.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace")))
693                                .map(|a| a.value.clone());
694                        }
695                        // Note: outer rdf:li increment happens when it closes (to correctly track which item we're on)
696                        in_rdf_li = true;
697                    }
698
699                    // Detect rdf:Alt
700                    if name.local_name == "Alt"
701                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
702                    {
703                        in_lang_alt = true;
704                        // Check if we're inside an outer rdf:li (Bag item)
705                        // Path ends with: ..., Bag, li, Alt (just pushed)
706                        let depth = path.len();
707                        if depth >= 3 {
708                            let li_elem = &path[depth - 2]; // li (just before Alt)
709                            let bag_elem = &path[depth - 3]; // Bag
710                            if li_elem.1 == "li" && li_elem.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
711                                && bag_elem.1 == "Bag" && bag_elem.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
712                            {
713                                lang_alt_in_bag = true;
714                            }
715                        }
716                    }
717
718                    // Inside a GContainer struct li: capture Container:Item attributes
719                    // These are struct fields: Item:Mime, Item:Semantic, Item:Length
720                    if in_gcontainer_li
721                        && name.local_name == "Item"
722                        && name.namespace.as_deref() == Some("http://ns.google.com/photos/1.0/container/")
723                    {
724                        // Collect Item:Mime, Item:Semantic, Item:Length for this li entry
725                        let mut found: std::collections::HashMap<String, String> =
726                            std::collections::HashMap::new();
727                        for attr in &attributes {
728                            if attr.name.namespace.as_deref() == Some("http://ns.google.com/photos/1.0/container/item/") {
729                                let field = ucfirst(&attr.name.local_name);
730                                found.insert(field, attr.value.clone());
731                            }
732                        }
733                        // Accumulate: for each known field, push value or empty string
734                        // (so all lists stay aligned)
735                        let known = ["Mime", "Semantic", "Length"];
736                        for k in &known {
737                            if let Some(v) = found.get(*k) {
738                                gcontainer_fields.entry(k.to_string())
739                                    .or_default()
740                                    .push(v.clone());
741                            }
742                        }
743                    }
744                }
745                Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
746                    current_text.push_str(&text);
747                }
748                Ok(XmlEvent::EndElement { name }) => {
749                    let ns_uri = name.namespace.as_deref().unwrap_or("");
750
751                    // Handle rdf:li list items
752                    if name.local_name == "li"
753                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
754                    {
755                        if in_lang_alt {
756                            // Inner rdf:li inside rdf:Alt — store by lang
757                            let lang = current_li_lang.take().unwrap_or_else(|| "x-default".to_string());
758                            let text = normalize_xml_text(&current_text);
759                            // Present-but-empty → Some(""), absent → padding with None below
760                            let opt_text: Option<String> = Some(text.clone());
761                            if lang_alt_in_bag {
762                                // Bag-of-lang-alt: accumulate per-lang for current bag item
763                                let entry = bag_lang_values.entry(lang.clone()).or_default();
764                                // Pad to bag_item_count (how many outer lis have closed so far)
765                                // bag_item_count is the count of completed outer lis
766                                while entry.len() < bag_item_count {
767                                    entry.push(None);
768                                }
769                                entry.push(opt_text);
770                            } else {
771                                // Simple lang-alt: use list_values for x-default, track others separately
772                                if lang == "x-default" {
773                                    list_values.push(text);
774                                } else {
775                                    // Store non-default lang values with "-lang" suffix
776                                    bag_lang_values.entry(lang).or_default().push(opt_text);
777                                }
778                            }
779                        } else if lang_alt_in_bag && !in_lang_alt {
780                            // Closing an outer rdf:li in bag-of-alt mode
781                            // (the Alt inside it has already closed and set in_lang_alt=false)
782                            // Increment bag_item_count to mark this item as complete
783                            bag_item_count += 1;
784                            // list_values not used in bag-of-alt mode
785                        } else if in_gcontainer_li {
786                            // GContainer struct li: fields were captured as attributes, not text
787                            in_gcontainer_li = false;
788                        } else if !normalize_xml_text(&current_text).is_empty() {
789                            list_values.push(normalize_xml_text(&current_text));
790                        }
791                        in_rdf_li = false;
792                        path.pop();
793                        current_text.clear();
794                        continue;
795                    }
796
797                    // When we close a Seq/Bag/Alt, emit the collected list
798                    if (name.local_name == "Seq"
799                        || name.local_name == "Bag"
800                        || name.local_name == "Alt")
801                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
802                    {
803                        // Handle closing rdf:Alt (simple lang-alt or end of inner Alt in bag-of-alt)
804                        if name.local_name == "Alt" {
805                            in_lang_alt = false;
806                            // If this Alt was inside a Bag li, don't emit yet — wait for Bag close
807                            if lang_alt_in_bag {
808                                // Reset for next Alt item — the outer li close will be next
809                                path.pop();
810                                current_text.clear();
811                                continue;
812                            }
813                            // Simple lang-alt (not in bag): emit tag with x-default as main value
814                            // and per-lang variants
815                            if let Some(parent) = path.iter().rev().nth(1) {
816                                let prefix = namespace_prefix(&parent.0);
817                                let tag_name = parent.1.clone();
818                                let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
819                                let category = namespace_category(group_prefix);
820
821                                // Check if this lang-alt field is inside a struct context.
822                                // Either path[rev(2)] == rdf:li (e.g., CvTermName inside li)
823                                // or we're inside any rdf:parseType="Resource" struct.
824                                let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
825                                let in_struct_li_alt = !parse_resource_depths.is_empty()
826                                    || path.iter().rev().nth(2)
827                                        .map(|(ns, ln)| ln == "li" && ns == rdf_ns)
828                                        .unwrap_or(false);
829                                let (full_tag_name, emit_group_prefix, emit_category) = if in_struct_li_alt {
830                                    // Use full ancestor path prefix (excluding current tag_name)
831                                    // path currently has: ..., struct_ancestors..., li, tag_name, Alt
832                                    // We want the prefix from ancestors before tag_name
833                                    let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &tag_name);
834                                    let field_uc = ucfirst(&strip_non_ascii(&tag_name));
835                                    if !ancestor_prefix.is_empty() {
836                                        let stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
837                                        let flat_raw = format!("{}{}", ancestor_prefix, stripped);
838                                        let flat = apply_flat_name_remap(&flat_raw).to_string();
839                                        // Find namespace from struct parent
840                                        let sp_gp = path.iter().rev()
841                                            .skip(2)
842                                            .skip_while(|(ns, ln)| ln == "li" || ln == "Bag" || ln == "Seq" || ln == "Alt" || ns == rdf_ns)
843                                            .find(|(ns, ln)| ln != "Description" && ns != rdf_ns)
844                                            .map(|(sp_ns, _)| {
845                                                let p = namespace_prefix(sp_ns);
846                                                if p.is_empty() { group_prefix } else { p }
847                                            })
848                                            .unwrap_or(group_prefix);
849                                        let cat = namespace_category(sp_gp);
850                                        (flat, sp_gp.to_string(), cat.to_string())
851                                    } else {
852                                        (ucfirst(&strip_non_ascii(&tag_name)), group_prefix.to_string(), category.to_string())
853                                    }
854                                } else {
855                                    let tn = apply_flat_name_remap(&ucfirst(&strip_non_ascii(&tag_name))).to_string();
856                                    (tn, group_prefix.to_string(), category.to_string())
857                                };
858
859                                // Emit x-default as main tag
860                                // Only emit if there's at least one non-empty value
861                                let has_nonempty = list_values.iter().any(|s| !s.is_empty());
862                                if !list_values.is_empty() && has_nonempty {
863                                    let main_val = if list_values.len() == 1 {
864                                        Value::String(list_values[0].clone())
865                                    } else {
866                                        Value::List(list_values.iter().map(|s| Value::String(s.clone())).collect())
867                                    };
868                                    let pv = main_val.to_display_string();
869                                    tags.push(Tag {
870                                        id: TagId::Text(format!("{}:{}", emit_group_prefix, tag_name)),
871                                        name: full_tag_name.clone(),
872                                        description: full_tag_name.clone(),
873                                        group: TagGroup {
874                                            family0: "XMP".into(),
875                                            family1: format!("XMP-{}", emit_group_prefix),
876                                            family2: emit_category.clone(),
877                                        },
878                                        raw_value: main_val,
879                                        print_value: pv,
880                                        priority: 0,
881                                    });
882                                }
883                                list_values.clear();
884                                // Emit per-lang variants as TagName-lang
885                                let mut lang_keys: Vec<String> = bag_lang_values.keys().cloned().collect();
886                                lang_keys.sort();
887                                for lang in &lang_keys {
888                                    let vals = &bag_lang_values[lang];
889                                    let non_none: Vec<String> = vals.iter()
890                                        .filter_map(|v| v.clone())
891                                        .collect();
892                                    if !non_none.is_empty() {
893                                        let lang_tag = format!("{}-{}", full_tag_name, lang);
894                                        let val = if non_none.len() == 1 {
895                                            Value::String(non_none[0].clone())
896                                        } else {
897                                            Value::List(non_none.iter().map(|s| Value::String(s.clone())).collect())
898                                        };
899                                        let pv = val.to_display_string();
900                                        tags.push(Tag {
901                                            id: TagId::Text(format!("{}-{}:{}", emit_group_prefix, lang, tag_name)),
902                                            name: lang_tag.clone(),
903                                            description: lang_tag.clone(),
904                                            group: TagGroup {
905                                                family0: "XMP".into(),
906                                                family1: format!("XMP-{}", emit_group_prefix),
907                                                family2: emit_category.clone(),
908                                            },
909                                            raw_value: val,
910                                            print_value: pv,
911                                            priority: 0,
912                                        });
913                                    }
914                                }
915                                bag_lang_values.clear();
916                            }
917                            path.pop();
918                            current_text.clear();
919                            continue;
920                        }
921
922                        // Handle closing rdf:Bag when it's a Bag-of-lang-alt
923                        if name.local_name == "Bag" && lang_alt_in_bag {
924                            lang_alt_in_bag = false;
925                            bag_item_count = 0;
926                            // Find the parent property name using full ancestor path
927                            if let Some(parent) = path.iter().rev().nth(1) {
928                                let prefix = namespace_prefix(&parent.0);
929                                let tag_name = parent.1.clone();
930                                let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
931                                let category = namespace_category(group_prefix);
932
933                                // Build full flat name from ancestor path
934                                let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &tag_name);
935                                let field_uc = ucfirst(&strip_non_ascii(&tag_name));
936                                let full_flat_base = if !ancestor_prefix.is_empty() {
937                                    let stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
938                                    let raw = format!("{}{}", ancestor_prefix, stripped);
939                                    apply_flat_name_remap(&raw).to_string()
940                                } else {
941                                    apply_flat_name_remap(&field_uc).to_string()
942                                };
943
944                                // Collect all language codes (maintaining insertion order: x-default first)
945                                let mut lang_keys: Vec<String> = bag_lang_values.keys().cloned().collect();
946                                // Put x-default first
947                                lang_keys.sort_by(|a, b| {
948                                    if a == "x-default" { std::cmp::Ordering::Less }
949                                    else if b == "x-default" { std::cmp::Ordering::Greater }
950                                    else { a.cmp(b) }
951                                });
952
953                                for lang in &lang_keys {
954                                    let vals = &bag_lang_values[lang];
955                                    let is_default = lang == "x-default"; // kept for tag naming below
956
957                                    // None = lang absent for this bag item → skip
958                                    // Some("") = lang present but empty → keep as empty slot
959                                    // Some(s) = lang present with value s → use s
960                                    //
961                                    // For x-default: skip None (absent items shouldn't affect default)
962                                    // For other langs: skip None (absent), keep Some("") (present but empty)
963                                    let joined: String = vals.iter()
964                                        .filter_map(|v| v.as_deref()) // None filtered out
965                                        .collect::<Vec<_>>()
966                                        .join(", ");
967
968                                    // Only emit if there's something meaningful
969                                    let has_content = vals.iter().any(|v| v.is_some());
970                                    if !has_content {
971                                        continue;
972                                    }
973
974                                    let (tag_key, tag_display) = if is_default {
975                                        (full_flat_base.clone(), full_flat_base.clone())
976                                    } else {
977                                        let lt = format!("{}-{}", full_flat_base, lang);
978                                        (lt.clone(), lt)
979                                    };
980
981                                    tags.push(Tag {
982                                        id: TagId::Text(format!("{}:{}", group_prefix, tag_key)),
983                                        name: tag_key.clone(),
984                                        description: tag_display,
985                                        group: TagGroup {
986                                            family0: "XMP".into(),
987                                            family1: format!("XMP-{}", group_prefix),
988                                            family2: category.into(),
989                                        },
990                                        raw_value: Value::String(joined.clone()),
991                                        print_value: joined,
992                                        priority: 0,
993                                    });
994                                }
995                                bag_lang_values.clear();
996                            }
997                            path.pop();
998                            current_text.clear();
999                            continue;
1000                        }
1001
1002                        // If this is the GContainer Seq, emit DirectoryItem* tags
1003                        if in_gcontainer_seq && name.local_name == "Seq" {
1004                            in_gcontainer_seq = false;
1005                            // Emit each field as DirectoryItem{Field}
1006                            for (field, values) in &gcontainer_fields {
1007                                let tag_name = format!("DirectoryItem{}", field);
1008                                let value = if values.len() == 1 {
1009                                    Value::String(values[0].clone())
1010                                } else {
1011                                    Value::List(values.iter().map(|s| Value::String(s.clone())).collect())
1012                                };
1013                                let print_value = value.to_display_string();
1014                                tags.push(Tag {
1015                                    id: TagId::Text(format!("GContainer:{}", tag_name)),
1016                                    name: tag_name.clone(),
1017                                    description: tag_name.clone(),
1018                                    group: TagGroup {
1019                                        family0: "XMP".into(),
1020                                        family1: "XMP-GContainer".into(),
1021                                        family2: "Image".into(),
1022                                    },
1023                                    raw_value: value,
1024                                    print_value,
1025                                    priority: 0,
1026                                });
1027                            }
1028                            gcontainer_fields.clear();
1029                        } else if !list_values.is_empty() {
1030                            // The parent element is the actual property
1031                            if let Some(parent) = path.iter().rev().nth(1) {
1032                                let prefix = namespace_prefix(&parent.0);
1033                                let tag_name = &parent.1;
1034                                // Skip RDF structural elements (RDF, Description, etc.)
1035                                if tag_name == "RDF" || tag_name == "xmpmeta" || tag_name == "xapmeta"
1036                                    || parent.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1037                                    || parent.0 == "adobe:ns:meta/"
1038                                {
1039                                    list_values.clear();
1040                                    path.pop();
1041                                    current_text.clear();
1042                                    continue;
1043                                }
1044                                let group_prefix =
1045                                    if prefix.is_empty() { "XMP" } else { prefix };
1046                                let _category = namespace_category(group_prefix);
1047
1048                                let value = if list_values.len() == 1 {
1049                                    Value::String(list_values[0].clone())
1050                                } else {
1051                                    Value::List(
1052                                        list_values
1053                                            .iter()
1054                                            .map(|s| Value::String(s.clone()))
1055                                            .collect(),
1056                                    )
1057                                };
1058
1059                                // Use full ancestor path for struct flattening
1060                                let ancestor_prefix = build_struct_tag_prefix_without_last(&path, tag_name);
1061                                let field_uc = ucfirst(&strip_non_ascii(tag_name));
1062                                let (full_name, emit_group_prefix) = if !ancestor_prefix.is_empty() {
1063                                    let field_stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
1064                                    let raw = format!("{}{}", ancestor_prefix, field_stripped);
1065                                    let flat = apply_flat_name_remap(&raw).to_string();
1066                                    // Find namespace from the outermost struct ancestor
1067                                    let sp_gp = path.iter().rev()
1068                                        .skip(1) // skip current list element (Seq/Bag/Alt)
1069                                        .skip(1) // skip tag_name
1070                                        .skip_while(|(ns, ln)| ln == "li" || ln == "Bag" || ln == "Seq" || ln == "Alt" || ns == "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1071                                        .find(|(ns, ln)| ln != "Description" && ns != "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1072                                        .map(|(sp_ns, _)| {
1073                                            let p = namespace_prefix(sp_ns);
1074                                            if p.is_empty() { group_prefix } else { p }
1075                                        })
1076                                        .unwrap_or(group_prefix);
1077                                    (flat, sp_gp.to_string())
1078                                } else {
1079                                    let flat = apply_flat_name_remap(&field_uc).to_string();
1080                                    (flat, group_prefix.to_string())
1081                                };
1082
1083                                let emit_cat = namespace_category(&emit_group_prefix);
1084                                let print_value = value.to_display_string();
1085
1086                                tags.push(Tag {
1087                                    id: TagId::Text(format!("{}:{}", emit_group_prefix, tag_name)),
1088                                    name: full_name.clone(),
1089                                    description: full_name,
1090                                    group: TagGroup {
1091                                        family0: "XMP".to_string(),
1092                                        family1: format!("XMP-{}", emit_group_prefix),
1093                                        family2: emit_cat.to_string(),
1094                                    },
1095                                    raw_value: value,
1096                                    print_value,
1097                                    priority: 0,
1098                                });
1099                            }
1100                            list_values.clear();
1101                        }
1102                        path.pop();
1103                        current_text.clear();
1104                        continue;
1105                    }
1106
1107                    // Struct properties inside rdf:li (e.g., stJob:name inside xmpBJ:JobRef/Bag/li)
1108                    // Perl flattens as "{FullPathPrefix}{FieldName}" → "JobRefName"
1109                    if !normalize_xml_text(&current_text).is_empty() && in_rdf_li
1110                        && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1111                        && name.local_name != "Description"
1112                    {
1113                        // Build the full ancestor prefix for struct flattening,
1114                        // excluding the current element (which is the field itself).
1115                        let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
1116                        if !ancestor_prefix.is_empty() {
1117                            let field_local = ucfirst(&strip_non_ascii(&name.local_name));
1118                            let field_stripped = strip_struct_prefix(&ancestor_prefix, &field_local);
1119                            let flat_name_raw = format!("{}{}", ancestor_prefix, field_stripped);
1120                            let flat_name = apply_flat_name_remap(&flat_name_raw).to_string();
1121                            let prefix = namespace_prefix(ns_uri);
1122                            let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
1123                            let category = namespace_category(group_prefix);
1124                            tags.push(Tag {
1125                                id: TagId::Text(format!("{}:{}", group_prefix, flat_name)),
1126                                name: flat_name.clone(), description: flat_name,
1127                                group: TagGroup { family0: "XMP".into(), family1: format!("XMP-{}", group_prefix), family2: category.into() },
1128                                raw_value: parse_xmp_value(&normalize_xml_text(&current_text)),
1129                                print_value: normalize_xml_text(&current_text), priority: 0,
1130                            });
1131                        }
1132                        path.pop();
1133                        current_text.clear();
1134                        continue;
1135                    }
1136
1137                    // Simple property with text content (or explicitly empty with et:id)
1138                    // Skip emission when inside a top-level blank-node Description (suppress_direct_emit_depth).
1139                    let has_et_depth = emit_empty_depths.contains(&path.len());
1140                    let in_suppressed_blank_node = suppress_direct_emit_depth
1141                        .map(|d| path.len() > d)
1142                        .unwrap_or(false);
1143                    if (!normalize_xml_text(&current_text).is_empty() || has_et_depth) && !in_rdf_li && !in_suppressed_blank_node {
1144                        let prefix = namespace_prefix(ns_uri);
1145                        let tag_name = &name.local_name;
1146
1147                        // Skip RDF structural elements
1148                        if tag_name != "Description"
1149                            && name.namespace.as_deref()
1150                                != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1151                        {
1152                            let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
1153                            let category = namespace_category(group_prefix);
1154
1155                            let text_val = normalize_xml_text(&current_text);
1156                            let value = parse_xmp_value(&text_val);
1157                            let print_value = value.to_display_string();
1158
1159                            // Build full ancestor path for struct flattening.
1160                            // Applies when inside rdf:parseType="Resource" structs OR when
1161                            // nested inside a property element (e.g., blank-node Description).
1162                            let remapped = remap_xmp_tag_name(group_prefix, tag_name);
1163                            let ancestor_prefix = build_struct_tag_prefix_without_last(&path, tag_name);
1164                            let full_name = if !ancestor_prefix.is_empty() {
1165                                let field_stripped = strip_struct_prefix(&ancestor_prefix, &remapped);
1166                                let candidate = format!("{}{}", ancestor_prefix, field_stripped);
1167                                apply_flat_name_remap(&candidate).to_string()
1168                            } else {
1169                                apply_flat_name_remap(&remapped).to_string()
1170                            };
1171
1172                            tags.push(Tag {
1173                                id: TagId::Text(format!("{}:{}", group_prefix, tag_name)),
1174                                name: full_name.clone(),
1175                                description: full_name,
1176                                group: TagGroup {
1177                                    family0: "XMP".to_string(),
1178                                    family1: format!("XMP-{}", group_prefix),
1179                                    family2: category.to_string(),
1180                                },
1181                                raw_value: value,
1182                                print_value,
1183                                priority: 0,
1184                            });
1185                        }
1186                    }
1187
1188                    // Pop parse_resource_depths if we're leaving that element
1189                    if parse_resource_depths.last() == Some(&path.len()) {
1190                        parse_resource_depths.pop();
1191                    }
1192                    emit_empty_depths.remove(&path.len());
1193
1194                    // If closing an inline blank-node Description, emit ALL blank node properties
1195                    // prefixed with the parent property element's name.
1196                    // Also clear suppress_direct_emit_depth when the top-level nodeID Description closes.
1197                    if name.local_name == "Description"
1198                        && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1199                    {
1200                        // Clear suppression if this Description was the suppressed one
1201                        if suppress_direct_emit_depth == Some(path.len()) {
1202                            suppress_direct_emit_depth = None;
1203                        }
1204
1205                        if let Some((node_id, parent_local)) = inline_blank_node_stack.last().cloned() {
1206                            // Check this Description is closing (path should end with Description)
1207                            if path.last().map(|(_, ln)| ln == "Description").unwrap_or(false) {
1208                                inline_blank_node_stack.pop();
1209                                if let Some(bn_props) = blank_node_props.get(&node_id) {
1210                                    let parent_uc = ucfirst(&strip_non_ascii(&parent_local));
1211                                    // Find the parent element's namespace to get group prefix
1212                                    let parent_ns = path.iter().rev().nth(1)
1213                                        .map(|(ns, _)| ns.as_str())
1214                                        .unwrap_or("");
1215                                    let parent_prefix_ns = namespace_prefix(parent_ns);
1216                                    let parent_group = if parent_prefix_ns.is_empty() {
1217                                        "XMP"
1218                                    } else { parent_prefix_ns };
1219                                    for (prop_ns, prop_local, prop_val) in bn_props {
1220                                        let prop_prefix = namespace_prefix(prop_ns);
1221                                        let prop_group = if prop_prefix.is_empty() { parent_group } else { prop_prefix };
1222                                        let prop_cat = namespace_category(prop_group);
1223                                        let prop_uc = ucfirst(&strip_non_ascii(prop_local));
1224                                        let stripped = strip_struct_prefix(&parent_uc, &prop_uc);
1225                                        let flat_raw = format!("{}{}", parent_uc, stripped);
1226                                        let flat = apply_flat_name_remap(&flat_raw).to_string();
1227                                        // Avoid duplicates (e.g., properties already emitted inline)
1228                                        if !tags.iter().any(|t| t.name == flat) {
1229                                            tags.push(Tag {
1230                                                id: TagId::Text(format!("{}:{}", prop_group, flat)),
1231                                                name: flat.clone(),
1232                                                description: flat,
1233                                                group: TagGroup {
1234                                                    family0: "XMP".into(),
1235                                                    family1: format!("XMP-{}", prop_group),
1236                                                    family2: prop_cat.into(),
1237                                                },
1238                                                raw_value: Value::String(prop_val.clone()),
1239                                                print_value: prop_val.clone(),
1240                                                priority: 0,
1241                                            });
1242                                        }
1243                                    }
1244                                }
1245                            }
1246                        }
1247                    }
1248
1249                    path.pop();
1250                    current_text.clear();
1251                }
1252                Err(_) => continue,
1253                _ => {}
1254            }
1255        }
1256
1257        // Post-processing: emit GainMap Warning if DirectoryItemSemantic contains "GainMap"
1258        let has_gainmap = tags.iter().any(|t| {
1259            t.name == "DirectoryItemSemantic"
1260                && t.print_value.contains("GainMap")
1261        });
1262        if has_gainmap {
1263            // Find DirectoryItemMime and DirectoryItemLength for the GainMap entry
1264            // Emit warning about GainMap image/jpeg not found in trailer
1265            let gainmap_mime = tags.iter()
1266                .find(|t| t.name == "DirectoryItemSemantic")
1267                .and_then(|t| {
1268                    // Find the semantic that is GainMap and get the corresponding Mime
1269                    // For simplicity, look for GainMap in the values
1270                    if let Value::List(ref items) = t.raw_value {
1271                        items.iter().enumerate()
1272                            .find(|(_, v)| v.to_display_string() == "GainMap")
1273                            .map(|(i, _)| i)
1274                    } else {
1275                        None
1276                    }
1277                })
1278                .and_then(|idx| {
1279                    tags.iter()
1280                        .find(|t| t.name == "DirectoryItemMime")
1281                        .and_then(|t| match &t.raw_value {
1282                            Value::List(items) => items.get(idx).map(|v| v.to_display_string()),
1283                            Value::String(s) => if idx == 0 { Some(s.clone()) } else { None },
1284                            _ => None,
1285                        })
1286                })
1287                .unwrap_or_else(|| "image/jpeg".to_string());
1288
1289            let warning_msg = format!(
1290                "[minor] Error reading GainMap {} from trailer",
1291                gainmap_mime
1292            );
1293            tags.push(Tag {
1294                id: TagId::Text("Warning".into()),
1295                name: "Warning".into(),
1296                description: "Warning".into(),
1297                group: TagGroup {
1298                    family0: "ExifTool".into(),
1299                    family1: "ExifTool".into(),
1300                    family2: "Other".into(),
1301                },
1302                raw_value: Value::String(warning_msg.clone()),
1303                print_value: warning_msg,
1304                priority: 0,
1305            });
1306        }
1307
1308        // Post-processing: compute Flash composite from FlashFired/Return/Mode/Function/RedEyeMode
1309        // This mirrors ExifTool's XMP Composite Flash tag.
1310        if !tags.iter().any(|t| t.name == "Flash") {
1311            let get_bool = |name: &str| -> Option<bool> {
1312                tags.iter().find(|t| t.name == name)
1313                    .map(|t| t.print_value.to_lowercase() == "true")
1314            };
1315            let get_int = |name: &str| -> Option<u32> {
1316                tags.iter().find(|t| t.name == name)
1317                    .and_then(|t| t.print_value.parse::<u32>().ok())
1318            };
1319            let flash_fired = get_bool("FlashFired");
1320            let flash_return = get_int("FlashReturn");
1321            let flash_mode = get_int("FlashMode");
1322            let flash_function = get_bool("FlashFunction");
1323            let flash_red_eye = get_bool("FlashRedEyeMode");
1324            // Only emit if we have at least one relevant tag
1325            if flash_fired.is_some() || flash_return.is_some() || flash_mode.is_some()
1326                || flash_function.is_some() || flash_red_eye.is_some()
1327            {
1328                let val: u32 =
1329                    (if flash_fired.unwrap_or(false) { 0x01 } else { 0 }) |
1330                    ((flash_return.unwrap_or(0) & 0x03) << 1) |
1331                    ((flash_mode.unwrap_or(0) & 0x03) << 3) |
1332                    (if flash_function.unwrap_or(false) { 0x20 } else { 0 }) |
1333                    (if flash_red_eye.unwrap_or(false) { 0x40 } else { 0 });
1334                let flash_str = flash_numeric_to_string(val);
1335                tags.push(Tag {
1336                    id: TagId::Text("Flash:Flash".into()),
1337                    name: "Flash".into(),
1338                    description: "Flash".into(),
1339                    group: TagGroup {
1340                        family0: "XMP".into(),
1341                        family1: "XMP-exif".into(),
1342                        family2: "Camera".into(),
1343                    },
1344                    raw_value: Value::String(format!("{}", val)),
1345                    print_value: flash_str,
1346                    priority: 0,
1347                });
1348            }
1349        }
1350
1351        // Post-processing: aggregate duplicate tag names (same name, different values)
1352        // into a single tag with comma-joined print_value. This matches ExifTool behavior
1353        // where repeated struct properties (e.g. in Bag/Seq items) are combined into one tag.
1354        // For exact duplicate names, keep only the first instance but join all values.
1355        let tags = aggregate_duplicate_xmp_tags(tags);
1356
1357        Ok(tags)
1358    }
1359}
1360
1361/// Aggregate tags with the same name into a single tag with comma-joined values.
1362/// The first occurrence is kept; subsequent occurrences with the same name have their
1363/// values appended to the first occurrence (comma-separated).
1364fn aggregate_duplicate_xmp_tags(tags: Vec<Tag>) -> Vec<Tag> {
1365    let mut result: Vec<Tag> = Vec::with_capacity(tags.len());
1366    let mut name_to_idx: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
1367
1368    for tag in tags {
1369        if let Some(&idx) = name_to_idx.get(&tag.name) {
1370            // Aggregate: append value to the existing tag
1371            let existing = &mut result[idx];
1372            if existing.print_value != tag.print_value {
1373                existing.print_value = format!("{}, {}", existing.print_value, tag.print_value);
1374            }
1375            // Don't push a new entry
1376        } else {
1377            let idx = result.len();
1378            name_to_idx.insert(tag.name.clone(), idx);
1379            result.push(tag);
1380        }
1381    }
1382    result
1383}
1384
1385/// Convert numeric flash value to ExifTool flash description string.
1386fn flash_numeric_to_string(val: u32) -> String {
1387    match val {
1388        0x00 => "No Flash".into(),
1389        0x01 => "Fired".into(),
1390        0x05 => "Fired, Return not detected".into(),
1391        0x07 => "Fired, Return detected".into(),
1392        0x08 => "On, Did not fire".into(),
1393        0x09 => "On, Fired".into(),
1394        0x0d => "On, Return not detected".into(),
1395        0x0f => "On, Return detected".into(),
1396        0x10 => "Off, Did not fire".into(),
1397        0x14 => "Off, Did not fire, Return not detected".into(),
1398        0x18 => "Auto, Did not fire".into(),
1399        0x19 => "Auto, Fired".into(),
1400        0x1d => "Auto, Fired, Return not detected".into(),
1401        0x1f => "Auto, Fired, Return detected".into(),
1402        0x20 => "No flash function".into(),
1403        0x30 => "Off, No flash function".into(),
1404        0x41 => "Fired, Red-eye reduction".into(),
1405        0x45 => "Fired, Red-eye reduction, Return not detected".into(),
1406        0x47 => "Fired, Red-eye reduction, Return detected".into(),
1407        0x49 => "On, Red-eye reduction".into(),
1408        0x4d => "On, Red-eye reduction, Return not detected".into(),
1409        0x4f => "On, Red-eye reduction, Return detected".into(),
1410        0x50 => "Off, Red-eye reduction".into(),
1411        0x58 => "Auto, Did not fire, Red-eye reduction".into(),
1412        0x59 => "Auto, Fired, Red-eye reduction".into(),
1413        0x5d => "Auto, Fired, Red-eye reduction, Return not detected".into(),
1414        0x5f => "Auto, Fired, Red-eye reduction, Return detected".into(),
1415        _ => format!("Unknown (0x{:02x})", val),
1416    }
1417}
1418
1419
1420/// Like build_struct_tag_prefix but excludes the element with the given local name
1421/// (used when path already includes the current element but we want the prefix without it).
1422fn build_struct_tag_prefix_without_last(path: &[(String, String)], exclude_ln: &str) -> String {
1423    build_struct_tag_prefix_excluding(path, Some(exclude_ln))
1424}
1425
1426fn build_struct_tag_prefix_excluding(path: &[(String, String)], exclude_last: Option<&str>) -> String {
1427    let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1428    let mut result = String::new();
1429    let effective_path: &[(String, String)] = if let Some(excl) = exclude_last {
1430        // Find the last occurrence of excl in path and trim there
1431        let mut end = path.len();
1432        for i in (0..path.len()).rev() {
1433            if path[i].1 == excl {
1434                end = i;
1435                break;
1436            }
1437        }
1438        &path[..end]
1439    } else {
1440        path
1441    };
1442    for (ns, ln) in effective_path {
1443        if ns == rdf_ns || ln == "Description" || ln == "RDF"
1444            || ln == "xmpmeta" || ln == "xapmeta"
1445            || ns == "adobe:ns:meta/"
1446        {
1447            continue;
1448        }
1449        // Skip rdf list/struct elements
1450        if ln == "Seq" || ln == "Bag" || ln == "Alt" || ln == "li" {
1451            continue;
1452        }
1453        let part = ucfirst(&strip_non_ascii(ln));
1454        if result.is_empty() {
1455            result = part;
1456        } else {
1457            // Strip prefix overlap between result suffix and part
1458            let stripped = strip_struct_prefix(&result, &part);
1459            result = format!("{}{}", result, stripped);
1460        }
1461    }
1462    result
1463}
1464
1465/// Strip struct-type prefix from field name when the parent name ends with that prefix.
1466/// E.g., parent "AboutCvTerm" ends with "CvTerm", field "CvTermName" starts with "CvTerm"
1467/// → return "Name" (stripped), so flat = "AboutCvTerm" + "Name" = "AboutCvTermName"
1468///
1469/// Also handles PLUS-style: parent "CopyrightOwner", field "CopyrightOwnerName"
1470/// → field starts with parent → strip entire parent → "Name"
1471/// so flat = "CopyrightOwner" + "Name" = "CopyrightOwnerName"
1472fn strip_struct_prefix(parent: &str, field: &str) -> String {
1473    // First: try stripping the full parent name as prefix (e.g., CopyrightOwner from CopyrightOwnerName)
1474    if field.starts_with(parent) && field.len() > parent.len() {
1475        let stripped = &field[parent.len()..];
1476        if !stripped.is_empty() {
1477            return stripped.to_string();
1478        }
1479    }
1480
1481    // Next: try progressively shorter suffixes of parent (min 2 chars, must start at word boundary)
1482    let parent_chars: Vec<char> = parent.chars().collect();
1483    for start in 1..parent_chars.len().saturating_sub(1) {
1484        // Only try positions that start with uppercase (word boundary)
1485        if !parent_chars[start].is_uppercase() {
1486            continue;
1487        }
1488        let suffix: String = parent_chars[start..].iter().collect();
1489        if field.starts_with(suffix.as_str()) && suffix.len() > 1 {
1490            let stripped = &field[suffix.len()..];
1491            if !stripped.is_empty() {
1492                return stripped.to_string();
1493            }
1494        }
1495    }
1496    field.to_string()
1497}
1498
1499/// Remap XMP tag names where ExifTool uses a different Name than the XMP local name.
1500fn remap_xmp_tag_name(group_prefix: &str, local_name: &str) -> String {
1501    // First strip non-ASCII characters from the local name (mirrors Perl byte-mode behavior)
1502    let clean_name = strip_non_ascii(local_name);
1503    let local_name = clean_name.as_str();
1504    match (group_prefix, local_name) {
1505        // tiff: namespace remappings
1506        ("tiff", "ImageLength") => "ImageHeight".into(),
1507        ("tiff", "BitsPerSample") => "BitsPerSample".into(),
1508        // exif: namespace remappings
1509        ("exif", "PixelXDimension") => "ExifImageWidth".into(),
1510        ("exif", "PixelYDimension") => "ExifImageHeight".into(),
1511        ("exif", "ExposureBiasValue") => "ExposureCompensation".into(),
1512        // photoshop: namespace remappings
1513        ("photoshop", "ICCProfile") => "ICCProfileName".into(),
1514        ("photoshop", "ColorMode") => "ColorMode".into(),
1515        // plus: namespace remappings
1516        ("plus", "Version") => "PLUSVersion".into(),
1517        _ => {
1518            // For unknown/ExifTool-internal namespaces (group_prefix = "XMP" or anything unknown),
1519            // if the local name has only uppercase letters (e.g. "ISO"), ExifTool normalizes it:
1520            // "ISO" → lowercase → ucfirst → "Iso"
1521            // Mixed-case names are kept as-is with ucfirst.
1522            let has_lowercase = local_name.chars().any(|c| c.is_lowercase());
1523            let has_uppercase = local_name.chars().any(|c| c.is_uppercase());
1524            if has_uppercase && !has_lowercase && local_name.len() > 1 {
1525                ucfirst(&local_name.to_lowercase())
1526            } else {
1527                ucfirst(local_name)
1528            }
1529        }
1530    }
1531}
1532
1533/// Apply known struct flat-name remappings.
1534/// ExifTool defines pre-computed flat tag names for well-known structs.
1535/// E.g., ArtworkOrObjectAOTitle → ArtworkTitle, KeywordsHierarchyKeyword → HierarchicalKeywords1.
1536/// Apply flat name remap that may involve dynamic prefix substitution.
1537/// This converts concatenated property-path names to their ExifTool tag Names.
1538fn apply_flat_name_remap(name: &str) -> String {
1539    // Dynamic prefix substitutions first (can't be done in a simple match)
1540    // MWG Regions Extensions: RegionsRegionListExtensions* → RegionExtensions*
1541    // Then apply any further remappings to the suffix (e.g., ArtworkOrObject → Artwork)
1542    if let Some(rest) = name.strip_prefix("RegionsRegionListExtensions") {
1543        let remapped_rest = apply_flat_name_remap(rest);
1544        return format!("RegionExtensions{}", remapped_rest);
1545    }
1546
1547    let mapped = match name {
1548        // IPTC Extension: ArtworkOrObject struct
1549        "ArtworkOrObjectAOCopyrightNotice"           => "ArtworkCopyrightNotice",
1550        "ArtworkOrObjectAOCreator"                   => "ArtworkCreator",
1551        "ArtworkOrObjectAODateCreated"               => "ArtworkDateCreated",
1552        "ArtworkOrObjectAOSource"                    => "ArtworkSource",
1553        "ArtworkOrObjectAOSourceInvNo"               => "ArtworkSourceInventoryNo",
1554        "ArtworkOrObjectAOTitle"                     => "ArtworkTitle",
1555        "ArtworkOrObjectAOCurrentCopyrightOwnerName" => "ArtworkCopyrightOwnerName",
1556        "ArtworkOrObjectAOCurrentCopyrightOwnerId"   => "ArtworkCopyrightOwnerID",
1557        "ArtworkOrObjectAOCurrentLicensorName"       => "ArtworkLicensorName",
1558        "ArtworkOrObjectAOCurrentLicensorId"         => "ArtworkLicensorID",
1559        "ArtworkOrObjectAOCreatorId"                 => "ArtworkCreatorID",
1560        "ArtworkOrObjectAOCircaDateCreated"          => "ArtworkCircaDateCreated",
1561        "ArtworkOrObjectAOStylePeriod"               => "ArtworkStylePeriod",
1562        "ArtworkOrObjectAOSourceInvURL"              => "ArtworkSourceInvURL",
1563        "ArtworkOrObjectAOContentDescription"        => "ArtworkContentDescription",
1564        "ArtworkOrObjectAOContributionDescription"   => "ArtworkContributionDescription",
1565        "ArtworkOrObjectAOPhysicalDescription"       => "ArtworkPhysicalDescription",
1566        // MWG Regions: Regions struct flat names (property path → Name)
1567        "RegionsRegionListName"            => "RegionName",
1568        "RegionsRegionListType"            => "RegionType",
1569        "RegionsRegionListDescription"     => "RegionDescription",
1570        "RegionsRegionListFocusUsage"      => "RegionFocusUsage",
1571        "RegionsRegionListBarCodeValue"    => "RegionBarCodeValue",
1572        "RegionsRegionListSeeAlso"         => "RegionSeeAlso",
1573        "RegionsRegionListRotation"        => "RegionRotation",
1574        "RegionsRegionListAreaH"           => "RegionAreaH",
1575        "RegionsRegionListAreaW"           => "RegionAreaW",
1576        "RegionsRegionListAreaX"           => "RegionAreaX",
1577        "RegionsRegionListAreaY"           => "RegionAreaY",
1578        "RegionsRegionListAreaD"           => "RegionAreaD",
1579        "RegionsRegionListAreaUnit"        => "RegionAreaUnit",
1580        // MWG Keywords: Keywords struct flat names
1581        "KeywordsHierarchyKeyword"                                     => "HierarchicalKeywords1",
1582        "KeywordsHierarchyChildrenKeyword"                             => "HierarchicalKeywords2",
1583        "KeywordsHierarchyChildrenChildrenKeyword"                     => "HierarchicalKeywords3",
1584        "KeywordsHierarchyChildrenChildrenChildrenKeyword"             => "HierarchicalKeywords4",
1585        "KeywordsHierarchyChildrenChildrenChildrenChildrenKeyword"     => "HierarchicalKeywords5",
1586        "KeywordsHierarchyChildrenChildrenChildrenChildrenChildrenKeyword" => "HierarchicalKeywords6",
1587        // xmpTPg: Colorants struct (FlatName => 'Colorant')
1588        "ColorantsSwatchName"    => "ColorantSwatchName",
1589        "ColorantsMode"          => "ColorantMode",
1590        "ColorantsType"          => "ColorantType",
1591        "ColorantsCyan"          => "ColorantCyan",
1592        "ColorantsMagenta"       => "ColorantMagenta",
1593        "ColorantsYellow"        => "ColorantYellow",
1594        "ColorantsBlack"         => "ColorantBlack",
1595        "ColorantsGray"          => "ColorantGray",
1596        "ColorantsRed"           => "ColorantRed",
1597        "ColorantsGreen"         => "ColorantGreen",
1598        "ColorantsBlue"          => "ColorantBlue",
1599        "ColorantsL"             => "ColorantL",
1600        "ColorantsA"             => "ColorantA",
1601        "ColorantsB"             => "ColorantB",
1602        // xmpTPg: Fonts struct (FlatName => '' → fields stand alone)
1603        "FontsFontName"          => "FontName",
1604        "FontsFontFamily"        => "FontFamily",
1605        "FontsFontFace"          => "FontFace",
1606        "FontsFontType"          => "FontType",
1607        "FontsVersionString"     => "FontVersion",
1608        "FontsComposite"         => "FontComposite",
1609        "FontsFontFileName"      => "FontFileName",
1610        // xmp: Thumbnails struct
1611        "ThumbnailsFormat"       => "ThumbnailFormat",
1612        "ThumbnailsWidth"        => "ThumbnailWidth",
1613        "ThumbnailsHeight"       => "ThumbnailHeight",
1614        "ThumbnailsImage"        => "ThumbnailImage",
1615        _ => name,
1616    };
1617    mapped.to_string()
1618}
1619
1620/// Parse an XMP text value into the appropriate Value type.
1621/// XMP rational values (e.g., "28/10", "5800/1000") are stored as Value::URational
1622/// so that composite computation can parse them as f64.
1623fn parse_xmp_value(text: &str) -> Value {
1624    // Try rational: N/D where N and D are integers (no whitespace)
1625    if let Some(slash) = text.find('/') {
1626        let num_str = &text[..slash];
1627        let den_str = &text[slash+1..];
1628        if !num_str.is_empty() && !den_str.is_empty()
1629            && !num_str.contains(' ') && !den_str.contains(' ')
1630        {
1631            if let (Ok(n), Ok(d)) = (num_str.parse::<i64>(), den_str.parse::<u64>()) {
1632                if d > 0 {
1633                    if n >= 0 {
1634                        return Value::URational(n as u32, d as u32);
1635                    } else {
1636                        return Value::IRational(n as i32, d as i32);
1637                    }
1638                }
1639            }
1640        }
1641    }
1642    Value::String(text.to_string())
1643}
1644
1645fn ucfirst(s: &str) -> String {
1646    let mut c = s.chars();
1647    match c.next() {
1648        None => String::new(),
1649        Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
1650    }
1651}
1652
1653/// Strip non-ASCII characters from a tag name component.
1654/// Perl XMP.pm works in byte-string mode where tag names extracted from XML
1655/// may contain raw UTF-8 bytes (e.g. U+2182 encoded as \xe2\x86\x82).
1656/// Perl naturally strips bytes > 0x7F when building ASCII tag names.
1657/// This mirrors that behavior by removing non-ASCII Unicode characters.
1658fn strip_non_ascii(s: &str) -> String {
1659    s.chars().filter(|c| c.is_ascii()).collect()
1660}
1661
1662/// Convert XML element names to ExifTool-style CamelCase tag names.
1663/// Mirrors Perl: `my $name = ucfirst lc $tag; $name =~ s/_(.)/\U$1/g;`
1664/// e.g. IMAGE_CREATION → ImageCreation, GENERAL_CREATION_INFO → GeneralCreationInfo
1665fn xml_elem_to_camel(s: &str) -> String {
1666    // If the string contains underscores or is ALL_CAPS, do full conversion:
1667    // lowercase, ucfirst, remove underscores capitalizing next char
1668    if s.contains('_') || s.chars().all(|c| c.is_uppercase() || !c.is_alphabetic()) {
1669        let lower = s.to_lowercase();
1670        let mut result = String::with_capacity(lower.len());
1671        let mut capitalize_next = true;
1672        for ch in lower.chars() {
1673            if ch == '_' {
1674                capitalize_next = true;
1675            } else if capitalize_next {
1676                for c in ch.to_uppercase() {
1677                    result.push(c);
1678                }
1679                capitalize_next = false;
1680            } else {
1681                result.push(ch);
1682            }
1683        }
1684        result
1685    } else {
1686        // camelCase or lowercase: just ucfirst
1687        let mut chars = s.chars();
1688        match chars.next() {
1689            None => String::new(),
1690            Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
1691        }
1692    }
1693}
1694
1695/// Normalize XML text content: trim outer whitespace and collapse internal whitespace sequences
1696/// (including newlines from multi-line XMP text nodes) into single spaces.
1697/// This matches ExifTool's XML text normalization behavior.
1698fn normalize_xml_text(s: &str) -> String {
1699    let trimmed = s.trim();
1700    if !trimmed.contains('\n') && !trimmed.contains('\r') {
1701        // Fast path: no line breaks
1702        return trimmed.to_string();
1703    }
1704    // Collapse any sequence of whitespace (including newlines) into a single space
1705    let mut result = String::with_capacity(trimmed.len());
1706    let mut last_was_space = false;
1707    for c in trimmed.chars() {
1708        if c.is_whitespace() {
1709            if !last_was_space {
1710                result.push(' ');
1711                last_was_space = true;
1712            }
1713        } else {
1714            result.push(c);
1715            last_was_space = false;
1716        }
1717    }
1718    result
1719}
1720
1721/// Read generic (non-RDF) XML files by building tag names from element paths.
1722/// This mirrors ExifTool's XMP.pm generic XML handling.
1723fn read_generic_xml(xml: &str) -> Result<Vec<Tag>> {
1724    use xml::reader::{EventReader, XmlEvent};
1725    let mut tags = Vec::new();
1726    let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
1727
1728    let parser = EventReader::from_str(xml);
1729    let mut path: Vec<String> = Vec::new(); // element local names (ucfirst'd)
1730    let mut current_text = String::new();
1731    // Track whether the current element has had any child elements (to detect leaf nodes)
1732    // Each entry corresponds to the matching path depth: true = has children
1733    let mut has_children: Vec<bool> = Vec::new();
1734
1735    // Accumulate full path as tag name prefix: root element name + child names concatenated
1736    // Each path component is ucfirst'd to produce CamelCase tag names (e.g., GpxTrkName).
1737    // Attributes on elements are emitted as TagName = value (path + attrName)
1738
1739    // Track which namespace URIs were declared on the root element (xmlns=...)
1740
1741    for event in parser {
1742        match event {
1743            Ok(XmlEvent::StartElement { name, attributes, namespace, .. }) => {
1744                let local = xml_elem_to_camel(&name.local_name);
1745                let path_str = format!("{}{}", path.join(""), local);
1746                // Mark parent as having a child element
1747                if let Some(last) = has_children.last_mut() {
1748                    *last = true;
1749                }
1750                path.push(local.clone());
1751                has_children.push(false);
1752                current_text.clear();
1753
1754                // Emit default xmlns (xmlns="uri") as {ElemName}Xmlns tag
1755                // xml-rs exposes xmlns in the namespace mappings
1756                // The default namespace (no prefix) is exposed via namespace.get("")
1757                // We look for newly-declared default NS at this element
1758                
1759                // Check if there's a new default namespace declared at this element
1760                // xml-rs merges namespaces so we check the full namespace map
1761                // The simplest approach: check if attributes contain xmlns-like entries
1762                // xml-rs exposes xmlns as regular attribute with prefix="xmlns", local=""
1763                // OR as a namespace mapping
1764                // Actually, in xml-rs the namespace object contains ALL in-scope namespaces.
1765                // We need to detect which ones are NEW at this element.
1766                // The simplest heuristic: only emit xmlns for root element (path depth 1)
1767                if path.len() == 1 {
1768                    // Root element: emit its default xmlns
1769                    if let Some(default_ns) = namespace.get("") {
1770                        // Emit as {RootName}Xmlns = default_ns_uri (CamelCase root name)
1771                        let tag_name = format!("{}Xmlns", local);
1772                        if !seen_names.contains(&tag_name) {
1773                            seen_names.insert(tag_name.clone());
1774                            let val = Value::String(default_ns.to_string());
1775                            let pv = val.to_display_string();
1776                            tags.push(Tag {
1777                                id: TagId::Text(format!("XMP:{}", tag_name)),
1778                                name: tag_name.clone(), description: tag_name,
1779                                group: TagGroup { family0: "XMP".into(), family1: "XMP".into(), family2: "Other".into() },
1780                                raw_value: val, print_value: pv, priority: 0,
1781                            });
1782                        }
1783                    }
1784                }
1785
1786                // Emit attributes as tags (only first occurrence)
1787                for attr in &attributes {
1788                    let aname = &attr.name;
1789                    if aname.prefix.as_deref() == Some("xmlns")
1790                        || aname.local_name == "xmlns"
1791                        || aname.local_name.starts_with("xmlns:")
1792                    {
1793                        // Skip xmlns declarations (handled via namespace above)
1794                        continue;
1795                    }
1796                    // For xsi:schemaLocation → emit as {path}SchemaLocation
1797                    let attr_local = xml_elem_to_camel(&aname.local_name);
1798                    let tag_name = format!("{}{}", path_str, attr_local);
1799                    if !seen_names.contains(&tag_name) {
1800                        seen_names.insert(tag_name.clone());
1801                        // Determine group prefix from namespace
1802                        let attr_ns = aname.namespace.as_deref().unwrap_or("");
1803                        let pfx = namespace_prefix(attr_ns);
1804                        let group_pfx = if pfx.is_empty() {
1805                            aname.prefix.as_deref().unwrap_or("XMP")
1806                        } else { pfx };
1807                        // Normalize attribute value to collapse internal whitespace/newlines
1808                        let attr_val = normalize_xml_text(&attr.value);
1809                        let val = Value::String(attr_val.clone());
1810                        let pv = val.to_display_string();
1811                        tags.push(Tag {
1812                            id: TagId::Text(format!("XMP:{}", tag_name)),
1813                            name: tag_name.clone(), description: tag_name,
1814                            group: TagGroup { family0: "XMP".into(), family1: format!("XMP-{}", group_pfx), family2: "Other".into() },
1815                            raw_value: val, print_value: pv, priority: 0,
1816                        });
1817                    }
1818                }
1819            }
1820            Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
1821                current_text.push_str(&text);
1822            }
1823            Ok(XmlEvent::EndElement { .. }) => {
1824                let text = normalize_xml_text(&current_text);
1825                let is_leaf = has_children.last().copied().unwrap_or(false) == false;
1826                // Emit tag if: has text content OR is a leaf node (no child elements, i.e. empty element)
1827                if (is_leaf || !text.is_empty()) && !path.is_empty() {
1828                    let tag_name = path.join("");
1829                    if !seen_names.contains(&tag_name) {
1830                        seen_names.insert(tag_name.clone());
1831                        let val = Value::String(text.clone());
1832                        let pv = val.to_display_string();
1833                        tags.push(Tag {
1834                            id: TagId::Text(format!("XMP:{}", tag_name)),
1835                            name: tag_name.clone(), description: tag_name,
1836                            group: TagGroup { family0: "XMP".into(), family1: "XMP".into(), family2: "Other".into() },
1837                            raw_value: val, print_value: pv, priority: 0,
1838                        });
1839                    }
1840                }
1841                current_text.clear();
1842                has_children.pop();
1843                path.pop();
1844            }
1845            Err(_) => continue,
1846            _ => {}
1847        }
1848    }
1849    Ok(tags)
1850}
1851
1852/// Fix malformed XML by removing unmatched closing tags.
1853/// Uses a tag stack to detect closing tags that have no matching open tag.
1854/// XML tag names are ASCII, so byte-level < > scanning is safe.
1855fn fix_malformed_xml(xml: &str) -> String {
1856    let mut stack: Vec<String> = Vec::new();
1857    let mut result = String::with_capacity(xml.len());
1858    let mut pos = 0;
1859
1860    while pos < xml.len() {
1861        if let Some(rel) = xml[pos..].find('<') {
1862            let lt = pos + rel;
1863            // Emit everything before this '<'
1864            result.push_str(&xml[pos..lt]);
1865            pos = lt;
1866
1867            // Check what kind of tag
1868            let rest = &xml[pos..];
1869            if rest.starts_with("</") {
1870                // Closing tag
1871                if let Some(gt_rel) = rest.find('>') {
1872                    let tag_name = rest[2..gt_rel].trim().to_string();
1873                    let gt = pos + gt_rel;
1874                    if stack.last().map(|s| s == &tag_name).unwrap_or(false) {
1875                        // Matched: emit and pop
1876                        stack.pop();
1877                        result.push_str(&xml[pos..=gt]);
1878                    } else if stack.contains(&tag_name) {
1879                        // Matches something deeper: pop up to it and emit
1880                        while stack.last().map(|s| s != &tag_name).unwrap_or(false) {
1881                            stack.pop();
1882                        }
1883                        stack.pop();
1884                        result.push_str(&xml[pos..=gt]);
1885                    }
1886                    // else: unmatched closing tag — skip it (emit nothing)
1887                    pos = gt + 1;
1888                } else {
1889                    result.push('<');
1890                    pos += 1;
1891                }
1892            } else if rest.starts_with("<!") || rest.starts_with("<?") {
1893                // Comment, CDATA, or PI — emit as-is until closing marker
1894                let end = if rest.starts_with("<!--") {
1895                    rest.find("-->").map(|e| pos + e + 3)
1896                } else if rest.starts_with("<![CDATA[") {
1897                    rest.find("]]>").map(|e| pos + e + 3)
1898                } else {
1899                    // Processing instruction
1900                    rest.find("?>").map(|e| pos + e + 2)
1901                };
1902                if let Some(end_pos) = end {
1903                    result.push_str(&xml[pos..end_pos]);
1904                    pos = end_pos;
1905                } else {
1906                    result.push('<');
1907                    pos += 1;
1908                }
1909            } else {
1910                // Opening or self-closing tag
1911                if let Some(gt_rel) = rest.find('>') {
1912                    let gt = pos + gt_rel;
1913                    let inner = rest[1..gt_rel].trim();
1914                    let is_self_closing = inner.ends_with('/');
1915                    if !is_self_closing {
1916                        let tag_name = inner.split(|c: char| c.is_whitespace() || c == '/')
1917                            .next().unwrap_or("").to_string();
1918                        if !tag_name.is_empty() {
1919                            stack.push(tag_name);
1920                        }
1921                    }
1922                    result.push_str(&xml[pos..=gt]);
1923                    pos = gt + 1;
1924                } else {
1925                    result.push('<');
1926                    pos += 1;
1927                }
1928            }
1929        } else {
1930            // No more '<': emit the rest
1931            result.push_str(&xml[pos..]);
1932            break;
1933        }
1934    }
1935    result
1936}
1937
1938/// Sanitize XMP XML: replace invalid XML characters (e.g., in xpacket PI values) with spaces.
1939/// This handles xpacket begin="" which may contain non-XML-legal bytes like 0x1A.
1940fn sanitize_xmp_xml(xml: &str) -> String {
1941    let mut result = String::with_capacity(xml.len());
1942    let mut in_pi = false; // inside a processing instruction <?...?>
1943    let chars: Vec<char> = xml.chars().collect();
1944    let mut i = 0;
1945    while i < chars.len() {
1946        let c = chars[i];
1947        if !in_pi && i + 1 < chars.len() && c == '<' && chars[i+1] == '?' {
1948            in_pi = true;
1949            result.push(c);
1950        } else if in_pi && c == '?' && i + 1 < chars.len() && chars[i+1] == '>' {
1951            in_pi = false;
1952            result.push(c);
1953            result.push(chars[i+1]);
1954            i += 2;
1955            continue;
1956        } else if in_pi {
1957            // Replace invalid XML chars in PI with space
1958            if c == '\t' || c == '\n' || c == '\r' || (c as u32 >= 0x20 && c as u32 <= 0xD7FF)
1959                || (c as u32 >= 0xE000 && c as u32 <= 0xFFFD)
1960                || (c as u32 >= 0x10000 && c as u32 <= 0x10FFFF) {
1961                result.push(c);
1962            } else {
1963                result.push(' ');
1964            }
1965        } else {
1966            result.push(c);
1967        }
1968        i += 1;
1969    }
1970    result
1971}
1972
1973/// Pre-pass: collect rdf:nodeID-mapped Bag/Seq values from XMP XML.
1974/// Returns a map from nodeID string → list of rdf:li text values.
1975fn collect_node_bag_values(xml: &str) -> std::collections::HashMap<String, Vec<String>> {
1976    use xml::reader::{EventReader, XmlEvent};
1977    let mut map: std::collections::HashMap<String, Vec<String>> = std::collections::HashMap::new();
1978
1979    let parser = EventReader::from_str(xml);
1980    let mut current_node_id: Option<String> = None;
1981    let mut current_items: Vec<String> = Vec::new();
1982    let mut in_li = false;
1983    let mut current_text = String::new();
1984    let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1985
1986    for event in parser {
1987        match event {
1988            Ok(XmlEvent::StartElement { name, attributes, .. }) => {
1989                current_text.clear();
1990                let local = &name.local_name;
1991                let ns = name.namespace.as_deref().unwrap_or("");
1992
1993                if (local == "Bag" || local == "Seq" || local == "Alt") && ns == rdf_ns {
1994                    // Check for rdf:nodeID attribute
1995                    if let Some(nid) = attributes.iter().find(|a| {
1996                        a.name.local_name == "nodeID"
1997                            && (a.name.prefix.as_deref() == Some("rdf") || ns == rdf_ns)
1998                    }) {
1999                        current_node_id = Some(nid.value.clone());
2000                        current_items.clear();
2001                    }
2002                } else if local == "li" && ns == rdf_ns && current_node_id.is_some() {
2003                    in_li = true;
2004                }
2005            }
2006            Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
2007                if in_li {
2008                    current_text.push_str(&text);
2009                }
2010            }
2011            Ok(XmlEvent::EndElement { name }) => {
2012                let local = &name.local_name;
2013                let ns = name.namespace.as_deref().unwrap_or("");
2014                if local == "li" && ns == rdf_ns && in_li {
2015                    let val = normalize_xml_text(&current_text);
2016                    if !val.is_empty() {
2017                        current_items.push(val);
2018                    }
2019                    in_li = false;
2020                    current_text.clear();
2021                } else if (local == "Bag" || local == "Seq" || local == "Alt") && ns == rdf_ns {
2022                    if let Some(nid) = current_node_id.take() {
2023                        map.insert(nid, std::mem::take(&mut current_items));
2024                    }
2025                }
2026            }
2027            Err(_) => continue,
2028            _ => {}
2029        }
2030    }
2031    map
2032}
2033
2034/// Pre-pass: collect nodeIDs that appear inline — i.e., rdf:Description rdf:nodeID="X"
2035/// nested inside a property element (not as a direct child of rdf:RDF or x:xmpmeta).
2036/// These nodeIDs are referenced inline by their parent property, so the top-level
2037/// Description with the same nodeID should NOT emit tags directly.
2038fn collect_inline_referenced_node_ids(xml: &str) -> std::collections::HashSet<String> {
2039    use xml::reader::{EventReader, XmlEvent};
2040    let mut set: std::collections::HashSet<String> = std::collections::HashSet::new();
2041    let parser = EventReader::from_str(xml);
2042    let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
2043    // Track the stack of (namespace, local_name) for parent context
2044    let mut path: Vec<(String, String)> = Vec::new();
2045
2046    for event in parser {
2047        match event {
2048            Ok(XmlEvent::StartElement { name, attributes, .. }) => {
2049                let local = name.local_name.clone();
2050                let ns = name.namespace.as_deref().unwrap_or("").to_string();
2051
2052                // Check if this is rdf:Description with rdf:nodeID
2053                if local == "Description" && ns == rdf_ns {
2054                    if let Some(nid_attr) = attributes.iter().find(|a| {
2055                        a.name.local_name == "nodeID"
2056                            && (a.name.prefix.as_deref() == Some("rdf")
2057                                || a.name.namespace.as_deref() == Some(rdf_ns))
2058                    }) {
2059                        // Determine if this Description is inline-referenced:
2060                        // it must have a non-RDF, non-xmpmeta, non-RDF-container parent.
2061                        // Direct children of rdf:RDF, x:xmpmeta, x:xapmeta are top-level.
2062                        let parent_is_top_level = path.last().map(|(pns, pln)| {
2063                            (pln == "RDF" && pns == rdf_ns)
2064                                || pln == "xmpmeta"
2065                                || pln == "xapmeta"
2066                        }).unwrap_or(true);
2067
2068                        if !parent_is_top_level {
2069                            set.insert(nid_attr.value.clone());
2070                        }
2071                    }
2072                }
2073
2074                path.push((ns, local));
2075            }
2076            Ok(XmlEvent::EndElement { .. }) => {
2077                path.pop();
2078            }
2079            Err(_) => continue,
2080            _ => {}
2081        }
2082    }
2083    set
2084}
2085
2086/// Pre-pass: collect ALL properties of blank nodes (rdf:Description with rdf:nodeID).
2087/// Returns a map from nodeID → Vec<(namespace_uri, local_name, value)>.
2088/// Handles attributes on Description, child elements with text, and rdf:resource attributes.
2089/// Multiple Descriptions with the same nodeID are merged.
2090fn collect_blank_node_properties(xml: &str) -> std::collections::HashMap<String, Vec<(String, String, String)>> {
2091    use xml::reader::{EventReader, XmlEvent};
2092    let mut map: std::collections::HashMap<String, Vec<(String, String, String)>> = std::collections::HashMap::new();
2093    let parser = EventReader::from_str(xml);
2094    let mut current_node_id: Option<String> = None;
2095    let mut current_text = String::new();
2096    let mut in_property = false;
2097    let mut current_prop_ns = String::new();
2098    let mut current_prop_local = String::new();
2099    let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
2100
2101    for event in parser {
2102        match event {
2103            Ok(XmlEvent::StartElement { name, attributes, .. }) => {
2104                current_text.clear();
2105                let local = &name.local_name;
2106                let ns = name.namespace.as_deref().unwrap_or("");
2107
2108                if local == "Description" && ns == rdf_ns {
2109                    // Check for rdf:nodeID
2110                    if let Some(nid_attr) = attributes.iter().find(|a| {
2111                        a.name.local_name == "nodeID"
2112                            && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns))
2113                    }) {
2114                        let nid = nid_attr.value.clone();
2115                        current_node_id = Some(nid.clone());
2116                        // Collect attribute properties (non-xmlns, non-rdf, non-about)
2117                        let entry = map.entry(nid).or_default();
2118                        for attr in &attributes {
2119                            if attr.name.local_name == "nodeID" || attr.name.local_name == "about" { continue; }
2120                            if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
2121                            if attr.name.local_name.starts_with("xmlns") { continue; }
2122                            if attr.name.prefix.as_deref() == Some("rdf") || attr.name.namespace.as_deref() == Some(rdf_ns) { continue; }
2123                            let attr_ns = attr.name.namespace.as_deref().unwrap_or("").to_string();
2124                            entry.push((attr_ns, attr.name.local_name.clone(), attr.value.clone()));
2125                        }
2126                    }
2127                } else if current_node_id.is_some() && ns != rdf_ns && local != "RDF" {
2128                    // Property child element inside a blank node Description
2129                    in_property = true;
2130                    current_prop_ns = ns.to_string();
2131                    current_prop_local = local.clone();
2132                    // Check for rdf:resource attribute
2133                    if let Some(res_attr) = attributes.iter().find(|a| {
2134                        a.name.local_name == "resource" && a.name.namespace.as_deref() == Some(rdf_ns)
2135                    }) {
2136                        let nid = current_node_id.as_ref().unwrap().clone();
2137                        let entry = map.entry(nid).or_default();
2138                        entry.push((ns.to_string(), local.clone(), res_attr.value.clone()));
2139                        in_property = false; // self-closing with resource
2140                    }
2141                }
2142            }
2143            Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
2144                if in_property {
2145                    current_text.push_str(&text);
2146                }
2147            }
2148            Ok(XmlEvent::EndElement { name }) => {
2149                let local = &name.local_name;
2150                let ns = name.namespace.as_deref().unwrap_or("");
2151                if local == "Description" && ns == rdf_ns {
2152                    current_node_id = None;
2153                    in_property = false;
2154                } else if in_property {
2155                    // Closing a property child element
2156                    let text = normalize_xml_text(&current_text);
2157                    if let Some(nid) = &current_node_id {
2158                        let entry = map.entry(nid.clone()).or_default();
2159                        entry.push((current_prop_ns.clone(), current_prop_local.clone(), text));
2160                    }
2161                    in_property = false;
2162                    current_text.clear();
2163                }
2164            }
2165            Err(_) => continue,
2166            _ => {}
2167        }
2168    }
2169    map
2170}