1use crate::error::{Error, Result};
6use crate::tag::{Tag, TagGroup, TagId};
7use crate::value::Value;
8
9use xml::reader::{EventReader, XmlEvent};
10
11pub struct XmpReader;
13
14fn namespace_prefix(uri: &str) -> &str {
16 match uri {
17 "http://purl.org/dc/elements/1.1/" => "dc",
18 "http://ns.adobe.com/xap/1.0/" => "xmp",
19 "http://ns.adobe.com/xap/1.0/mm/" => "xmpMM",
20 "http://ns.adobe.com/xap/1.0/rights/" => "xmpRights",
21 "http://ns.adobe.com/tiff/1.0/" => "tiff",
22 "http://ns.adobe.com/exif/1.0/" => "exif",
23 "http://ns.adobe.com/exif/1.0/aux/" => "aux",
24 "http://ns.adobe.com/photoshop/1.0/" => "photoshop",
25 "http://ns.adobe.com/camera-raw-settings/1.0/" => "crs",
26 "http://ns.adobe.com/lightroom/1.0/" => "lr",
27 "http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/" => "Iptc4xmpCore",
28 "http://iptc.org/std/Iptc4xmpExt/2008-02-29/" => "Iptc4xmpExt",
29 "http://ns.google.com/photos/1.0/camera/" => "GCamera",
30 "http://ns.google.com/photos/1.0/image/" => "GImage",
31 "http://ns.google.com/photos/1.0/container/" => "GContainer",
32 "http://ns.google.com/photos/1.0/container/item/" => "GContainerItem",
33 "http://ns.google.com/photos/dd/1.0/device/" => "GDevice",
34 "http://ns.adobe.com/xmp/note/" => "xmpNote",
35 "adobe:ns:meta/" => "x",
36 "http://ns.adobe.com/pdf/1.3/" => "pdf",
37 "http://ns.adobe.com/xap/1.0/bj/" => "xmpBJ",
38 "http://ns.adobe.com/xap/1.0/sType/Job#" => "stJob",
39 "http://ns.adobe.com/xap/1.0/t/pg/" => "xmpTPg",
40 "http://ns.adobe.com/xap/1.0/g/" => "xmpG",
41 "http://ns.adobe.com/xap/1.0/g/img/" => "xmpGImg",
42 "http://ns.adobe.com/xap/1.0/sType/Dimensions#" => "stDim",
43 "http://ns.adobe.com/xap/1.0/sType/ResourceRef#" => "stRef",
44 "http://ns.adobe.com/xap/1.0/sType/Font#" => "stFnt",
45 "http://ns.adobe.com/xap/1.0/sType/ManifestItem#" => "stMfs",
46 "http://www.w3.org/2000/01/rdf-schema#" => "rdfs",
47 "http://ns.microsoft.com/photo/1.0/" => "MicrosoftPhoto",
48 "http://ns.useplus.org/ldf/xmp/1.0/" => "plus",
49 "http://ns.adobe.com/xap/1.0/sType/Area#" => "stArea",
50 "http://www.metadataworkinggroup.com/schemas/regions/" => "mwg-rs",
51 "http://www.metadataworkinggroup.com/schemas/keywords/" => "mwg-kw",
52 _ => "",
53 }
54}
55
56fn namespace_category(prefix: &str) -> &str {
58 match prefix {
59 "dc" => "Author",
60 "xmp" | "xmpMM" | "xmpRights" => "Other",
61 "tiff" => "Image",
62 "exif" | "aux" => "Camera",
63 "photoshop" => "Image",
64 "Iptc4xmpCore" | "Iptc4xmpExt" => "Other",
65 _ => "Other",
66 }
67}
68
69fn is_hdrp_makernote_attr(local_name: &str) -> bool {
72 local_name == "HdrPlusMakernote" || local_name == "hdrp_makernote"
73}
74
75fn emit_hdrp_makernote(b64_value: &str, tags: &mut Vec<Tag>) {
77 use crate::metadata::google_hdrp::decode_hdrp_makernote;
78
79 let raw_bytes = b64_value.trim().len() * 3 / 4; let print = format!("(Binary data {} bytes, use -b option to extract)", raw_bytes);
82 tags.push(Tag {
83 id: TagId::Text("GCamera:HdrPlusMakernote".into()),
84 name: "HDRPlusMakerNote".into(),
85 description: "HDRPlusMakerNote".into(),
86 group: TagGroup {
87 family0: "XMP".into(),
88 family1: "XMP-GCamera".into(),
89 family2: "Other".into(),
90 },
91 raw_value: Value::String(b64_value.to_string()),
92 print_value: print,
93 priority: 0,
94 });
95
96 let hdrp_tags = decode_hdrp_makernote(b64_value);
98 tags.extend(hdrp_tags);
99}
100
101impl XmpReader {
102 pub fn read(data: &[u8]) -> Result<Vec<Tag>> {
104 let mut tags = Vec::new();
105
106 let converted: Option<String> = if data.starts_with(&[0xFE, 0xFF]) {
109 let units: Vec<u16> = data[2..].chunks_exact(2)
110 .map(|c| u16::from_be_bytes([c[0], c[1]])).collect();
111 Some(String::from_utf16_lossy(&units))
112 } else if data.starts_with(&[0xFF, 0xFE]) && !data.starts_with(&[0xFF, 0xFE, 0x00, 0x00]) {
113 let units: Vec<u16> = data[2..].chunks_exact(2)
114 .map(|c| u16::from_le_bytes([c[0], c[1]])).collect();
115 Some(String::from_utf16_lossy(&units))
116 } else if data.len() > 4 && data[0] == 0 && data[1] != 0 {
117 let units: Vec<u16> = data.chunks_exact(2)
119 .map(|c| u16::from_be_bytes([c[0], c[1]])).collect();
120 Some(String::from_utf16_lossy(&units))
121 } else {
122 None
123 };
124 let xml_data: &str = if let Some(ref s) = converted {
125 s.as_str()
126 } else if data.starts_with(&[0xEF, 0xBB, 0xBF]) {
127 std::str::from_utf8(&data[3..])
129 .map_err(|e| Error::InvalidXmp(format!("invalid UTF-8: {}", e)))?
130 } else {
131 std::str::from_utf8(data)
133 .or_else(|_| {
134 let trimmed = &data[..data.iter().rposition(|&b| b == b'>').unwrap_or(0) + 1];
135 std::str::from_utf8(trimmed)
136 })
137 .map_err(|e| Error::InvalidXmp(format!("invalid UTF-8: {}", e)))?
138 };
139
140 let xml_sanitized: String = sanitize_xmp_xml(xml_data);
143 let xml_clean: String = fix_malformed_xml(&xml_sanitized);
144 let xml_for_parse: &str = &xml_clean;
145
146 let is_inx = {
149 let trimmed = xml_for_parse.trim_start();
150 trimmed.starts_with("<?xml") && {
151 trimmed.lines().take(5).any(|l| l.trim_start().starts_with("<?aid "))
153 }
154 };
155 if is_inx {
156 if let Some(cdata_start) = xml_for_parse.find("<![CDATA[<?xpacket begin") {
158 let xmp_start = cdata_start + 9; if let Some(end_marker) = xml_for_parse[xmp_start..].find("<?xpacket end=") {
161 let after_end = xmp_start + end_marker;
162 if let Some(close) = xml_for_parse[after_end..].find("?>") {
163 let xmp_end = after_end + close + 2; let xmp_data = &xml_for_parse[xmp_start..xmp_end];
165 let xmp_bytes = xmp_data.as_bytes().to_vec();
167 return XmpReader::read(&xmp_bytes);
168 }
169 }
170 }
171 return Ok(tags);
172 }
173
174 let is_rdf = xml_for_parse.contains("rdf:RDF") || xml_for_parse.contains("rdf:Description");
176 if !is_rdf {
177 return read_generic_xml(xml_for_parse);
179 }
180
181 let node_bags: std::collections::HashMap<String, Vec<String>> =
183 collect_node_bag_values(xml_for_parse);
184
185 let blank_node_props: std::collections::HashMap<String, Vec<(String, String, String)>> =
188 collect_blank_node_properties(xml_for_parse);
189
190 let inline_referenced_node_ids: std::collections::HashSet<String> =
194 collect_inline_referenced_node_ids(xml_for_parse);
195
196 let parser = EventReader::from_str(xml_for_parse);
197 let mut path: Vec<(String, String)> = Vec::new(); let mut current_text = String::new();
199 let mut in_rdf_li = false;
200 let mut list_values: Vec<String> = Vec::new();
201 let mut emit_empty_depths: std::collections::HashSet<usize> = std::collections::HashSet::new();
203 let mut parse_resource_depths: Vec<usize> = Vec::new();
206
207 let mut inline_blank_node_stack: Vec<(String, String)> = Vec::new();
211 let mut suppress_direct_emit_depth: Option<usize> = None;
214
215 let mut gcontainer_fields: std::collections::HashMap<String, Vec<String>> =
218 std::collections::HashMap::new();
219 let mut in_gcontainer_seq = false;
221 let mut in_gcontainer_li = false;
223
224 let mut current_li_lang: Option<String> = None;
231 let mut in_lang_alt = false;
232 let mut lang_alt_in_bag = false;
233 let mut bag_lang_values: std::collections::HashMap<String, Vec<Option<String>>> =
234 std::collections::HashMap::new();
235 let mut bag_item_count: usize = 0;
236
237 for event in parser {
238 match event {
239 Ok(XmlEvent::StartElement {
240 name, attributes, ..
241 }) => {
242 let ns_uri = name.namespace.as_deref().unwrap_or("");
244 path.push((ns_uri.to_string(), name.local_name.clone()));
245 current_text.clear();
246
247 let has_et_id = attributes.iter().any(|a| {
249 a.name.local_name == "id"
250 && (a.name.prefix.as_deref() == Some("et")
251 || a.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/"))
252 });
253 if has_et_id {
254 emit_empty_depths.insert(path.len());
255 }
256
257 let has_parse_resource = attributes.iter().any(|a| {
259 a.name.local_name == "parseType"
260 && (a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
261 || a.name.prefix.as_deref() == Some("rdf"))
262 && a.value == "Resource"
263 });
264 if has_parse_resource {
265 parse_resource_depths.push(path.len());
266 }
267
268 if name.local_name == "xmpmeta" || name.local_name == "xapmeta" {
270 for attr in &attributes {
271 if attr.name.local_name == "xmptk" || attr.name.local_name == "xaptk" {
272 tags.push(Tag {
273 id: TagId::Text("x:xmptk".into()),
274 name: "XMPToolkit".into(),
275 description: "XMP Toolkit".into(),
276 group: TagGroup { family0: "XMP".into(), family1: "XMP-x".into(), family2: "Other".into() },
277 raw_value: Value::String(attr.value.clone()),
278 print_value: attr.value.clone(),
279 priority: 0,
280 });
281 }
282 }
283 }
284
285 if name.local_name != "Description"
290 && name.local_name != "RDF"
291 && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
292 {
293 if let Some(node_ref) = attributes.iter().find(|a| {
294 a.name.local_name == "nodeID"
295 && (a.name.prefix.as_deref() == Some("rdf")
296 || a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
297 }) {
298 let node_id = &node_ref.value;
299 if let Some(bag_values) = node_bags.get(node_id) {
300 let ns_uri = name.namespace.as_deref().unwrap_or("");
301 let prefix = namespace_prefix(ns_uri);
302 let group_prefix = if prefix.is_empty() {
303 name.prefix.as_deref().unwrap_or("XMP")
304 } else {
305 prefix
306 };
307 let category = namespace_category(group_prefix);
308 let full_name = ucfirst(&name.local_name);
309 let value = if bag_values.len() == 1 {
310 Value::String(bag_values[0].clone())
311 } else {
312 Value::List(bag_values.iter().map(|s| Value::String(s.clone())).collect())
313 };
314 let pv = value.to_display_string();
315 tags.push(Tag {
316 id: TagId::Text(format!("{}:{}", group_prefix, name.local_name)),
317 name: full_name.clone(),
318 description: full_name,
319 group: TagGroup {
320 family0: "XMP".into(),
321 family1: format!("XMP-{}", group_prefix),
322 family2: category.into(),
323 },
324 raw_value: value,
325 print_value: pv,
326 priority: 0,
327 });
328 }
329 if let Some(bn_props) = blank_node_props.get(node_id) {
331 let elem_ns = name.namespace.as_deref().unwrap_or("");
332 let elem_prefix_ns = namespace_prefix(elem_ns);
333 let elem_group = if elem_prefix_ns.is_empty() {
334 name.prefix.as_deref().unwrap_or("XMP")
335 } else { elem_prefix_ns };
336 let parent_uc = ucfirst(&strip_non_ascii(&name.local_name));
337 let anc_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
339 let elem_flat = if anc_prefix.is_empty() {
340 parent_uc.clone()
341 } else {
342 let stripped = strip_struct_prefix(&anc_prefix, &parent_uc);
343 format!("{}{}", anc_prefix, stripped)
344 };
345 for (prop_ns, prop_local, prop_val) in bn_props {
346 let prop_prefix = namespace_prefix(prop_ns);
347 let prop_group = if prop_prefix.is_empty() { elem_group } else { prop_prefix };
348 let prop_cat = namespace_category(prop_group);
349 let prop_uc = ucfirst(&strip_non_ascii(prop_local));
350 let stripped = strip_struct_prefix(&elem_flat, &prop_uc);
351 let flat_raw = format!("{}{}", elem_flat, stripped);
352 let flat = apply_flat_name_remap(&flat_raw).to_string();
353 tags.push(Tag {
354 id: TagId::Text(format!("{}:{}", prop_group, flat)),
355 name: flat.clone(),
356 description: flat,
357 group: TagGroup {
358 family0: "XMP".into(),
359 family1: format!("XMP-{}", prop_group),
360 family2: prop_cat.into(),
361 },
362 raw_value: Value::String(prop_val.clone()),
363 print_value: prop_val.clone(),
364 priority: 0,
365 });
366 }
367 }
368 }
369 }
370
371 let in_suppressed_bn = suppress_direct_emit_depth
376 .map(|d| path.len() > d)
377 .unwrap_or(false);
378 if name.local_name != "Description"
379 && name.local_name != "RDF"
380 && !in_suppressed_bn
381 && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
382 {
383 if let Some(res_attr) = attributes.iter().find(|a| {
384 a.name.local_name == "resource"
385 && (a.name.prefix.as_deref() == Some("rdf")
386 || a.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
387 }) {
388 let resource_val = res_attr.value.clone();
389 let ns_uri = name.namespace.as_deref().unwrap_or("");
390 let prefix = namespace_prefix(ns_uri);
391 let group_prefix = if prefix.is_empty() {
392 name.prefix.as_deref().unwrap_or("XMP")
393 } else {
394 prefix
395 };
396 let category = namespace_category(group_prefix);
397 let remapped = remap_xmp_tag_name(group_prefix, &name.local_name);
399 let full_name = if !parse_resource_depths.is_empty() {
400 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
401 if !ancestor_prefix.is_empty() {
402 let field_stripped = strip_struct_prefix(&ancestor_prefix, &remapped);
403 let candidate = format!("{}{}", ancestor_prefix, field_stripped);
404 apply_flat_name_remap(&candidate).to_string()
405 } else {
406 apply_flat_name_remap(&remapped).to_string()
407 }
408 } else {
409 apply_flat_name_remap(&remapped).to_string()
410 };
411 tags.push(Tag {
412 id: TagId::Text(format!("{}:{}", group_prefix, name.local_name)),
413 name: full_name.clone(),
414 description: full_name,
415 group: TagGroup {
416 family0: "XMP".into(),
417 family1: format!("XMP-{}", group_prefix),
418 family2: category.into(),
419 },
420 raw_value: Value::String(resource_val.clone()),
421 print_value: resource_val,
422 priority: 0,
423 });
424 }
425 }
426
427 let rdf_ns_check = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
430 let desc_node_id = attributes.iter().find(|a| {
431 a.name.local_name == "nodeID"
432 && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns_check))
433 }).map(|a| a.value.clone());
434 let is_top_level_blank_node_desc = name.local_name == "Description"
435 && name.namespace.as_deref() == Some(rdf_ns_check)
436 && desc_node_id.as_ref().map(|nid| inline_referenced_node_ids.contains(nid.as_str())).unwrap_or(false)
437 && path.iter().rev().nth(1)
438 .map(|(ns, ln)| ns == rdf_ns_check || ln == "RDF" || ln == "xmpmeta" || ln == "xapmeta")
439 .unwrap_or(true); if name.local_name == "Description" && !is_top_level_blank_node_desc {
445 for attr in &attributes {
446 if attr.name.local_name == "about" {
448 if !attr.value.is_empty() {
449 tags.push(Tag {
450 id: TagId::Text("rdf:about".into()),
451 name: "About".into(), description: "About".into(),
452 group: TagGroup { family0: "XMP".into(), family1: "XMP-rdf".into(), family2: "Other".into() },
453 raw_value: Value::String(attr.value.clone()),
454 print_value: attr.value.clone(), priority: 0,
455 });
456 }
457 continue;
458 }
459 if attr.name.local_name == "nodeID"
461 && (attr.name.prefix.as_deref() == Some("rdf")
462 || attr.name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
463 {
464 continue;
465 }
466 if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
467 if attr.name.local_name.starts_with("xmlns") { continue; }
468 if attr.name.prefix.as_deref() == Some("et")
470 || attr.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/")
471 || attr.name.namespace.as_deref() == Some("http://ns.exiftool.ca/1.0/")
472 { continue; }
473
474 let attr_ns = attr.name.namespace.as_deref().unwrap_or("");
475 let attr_prefix = namespace_prefix(attr_ns);
476 let group_prefix = if attr_prefix.is_empty() {
477 attr.name.prefix.as_deref().unwrap_or("XMP")
478 } else {
479 attr_prefix
480 };
481
482 {
483 if !attr.value.is_empty() && group_prefix == "GCamera" && is_hdrp_makernote_attr(&attr.name.local_name) {
486 emit_hdrp_makernote(&attr.value, &mut tags);
487 continue;
488 }
489
490 let category = namespace_category(group_prefix);
491 let remapped = remap_xmp_tag_name(group_prefix, &attr.name.local_name);
492 let rdf_ns2 = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
496 let full_name = if let Some(parent_elem) = path.iter().rev().nth(1) {
497 if parent_elem.0 != rdf_ns2
498 && parent_elem.1 != "RDF"
499 && parent_elem.1 != "xmpmeta"
500 && parent_elem.1 != "xapmeta"
501 {
502 let parent_uc = ucfirst(&strip_non_ascii(&parent_elem.1));
503 let field_uc = ucfirst(&strip_non_ascii(&attr.name.local_name));
504 let stripped = strip_struct_prefix(&parent_uc, &field_uc);
505 apply_flat_name_remap(&format!("{}{}", parent_uc, stripped)).to_string()
506 } else {
507 remapped
508 }
509 } else {
510 remapped
511 };
512 tags.push(Tag {
513 id: TagId::Text(format!("{}:{}", group_prefix, attr.name.local_name)),
514 name: full_name.clone(),
515 description: full_name,
516 group: TagGroup {
517 family0: "XMP".to_string(),
518 family1: format!("XMP-{}", group_prefix),
519 family2: category.to_string(),
520 },
521 raw_value: parse_xmp_value(&attr.value),
522 print_value: attr.value.clone(),
523 priority: 0,
524 });
525 }
526 }
527 }
528
529 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
534 if name.local_name == "Description" && name.namespace.as_deref() == Some(rdf_ns) {
535 if let Some(nid_attr) = attributes.iter().find(|a| {
536 a.name.local_name == "nodeID"
537 && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns))
538 }) {
539 let nid = nid_attr.value.clone();
540 let parent_is_property = path.iter().rev().nth(1).map(|(pns, pln)| {
542 pns != rdf_ns && pln != "RDF" && pln != "xmpmeta" && pln != "xapmeta"
543 }).unwrap_or(false);
544
545 if parent_is_property {
546 if let Some(parent) = path.iter().rev().nth(1) {
548 inline_blank_node_stack.push((nid, parent.1.clone()));
549 }
550 } else if inline_referenced_node_ids.contains(nid.as_str()) {
551 suppress_direct_emit_depth = Some(path.len());
554 } else {
555 if let Some(bn_props) = blank_node_props.get(nid.as_str()) {
558 for (prop_ns, prop_local, prop_val) in bn_props.clone() {
559 let prop_prefix = namespace_prefix(&prop_ns);
560 let prop_prefix = if prop_prefix.is_empty() { "XMP" } else { prop_prefix };
561 let category = namespace_category(prop_prefix);
562 let remapped = remap_xmp_tag_name(prop_prefix, &prop_local);
563 tags.push(Tag {
564 id: TagId::Text(format!("{}:{}", prop_prefix, prop_local)),
565 name: remapped.clone(),
566 description: remapped,
567 group: TagGroup {
568 family0: "XMP".to_string(),
569 family1: format!("XMP-{}", prop_prefix),
570 family2: category.to_string(),
571 },
572 raw_value: parse_xmp_value(&prop_val),
573 print_value: prop_val.clone(),
574 priority: 0,
575 });
576 }
577 }
578 suppress_direct_emit_depth = Some(path.len());
580 }
581 }
582 }
583
584 let is_rdf_structural = name.namespace.as_deref() == Some(rdf_ns)
590 || name.local_name == "Description"
591 || name.local_name == "RDF"
592 || name.local_name == "li"
593 || name.local_name == "Seq"
594 || name.local_name == "Bag"
595 || name.local_name == "Alt"
596 || name.local_name == "xmpmeta"
597 || name.local_name == "xapmeta"
598 || name.namespace.as_deref() == Some("adobe:ns:meta/");
599 if !is_rdf_structural && !attributes.is_empty() {
600 let elem_ns = name.namespace.as_deref().unwrap_or("");
601 let elem_prefix = namespace_prefix(elem_ns);
602 let elem_group = if elem_prefix.is_empty() {
603 name.prefix.as_deref().unwrap_or("XMP")
604 } else {
605 elem_prefix
606 };
607 let ancestors_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
611 let elem_uc = ucfirst(&strip_non_ascii(&name.local_name));
612 let elem_flat = if ancestors_prefix.is_empty() {
614 elem_uc.clone()
615 } else {
616 let stripped = strip_struct_prefix(&ancestors_prefix, &elem_uc);
617 format!("{}{}", ancestors_prefix, stripped)
618 };
619
620 for attr in &attributes {
621 if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
623 if attr.name.local_name.starts_with("xmlns") { continue; }
624 if attr.name.prefix.as_deref() == Some("rdf")
625 || attr.name.namespace.as_deref() == Some(rdf_ns) { continue; }
626 if attr.name.prefix.as_deref() == Some("et")
627 || attr.name.namespace.as_deref() == Some("http://ns.exiftool.org/1.0/")
628 || attr.name.namespace.as_deref() == Some("http://ns.exiftool.ca/1.0/") { continue; }
629 if attr.name.prefix.as_deref() == Some("xml")
630 || attr.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace") { continue; }
631
632 let attr_ns = attr.name.namespace.as_deref().unwrap_or("");
633 let attr_prefix_resolved = namespace_prefix(attr_ns);
634 let attr_group = if attr_prefix_resolved.is_empty() {
635 attr.name.prefix.as_deref().unwrap_or(elem_group)
636 } else {
637 attr_prefix_resolved
638 };
639 let field_uc = ucfirst(&strip_non_ascii(&attr.name.local_name));
640 let field_stripped = strip_struct_prefix(&elem_flat, &field_uc);
642 let flat_name_raw = format!("{}{}", elem_flat, field_stripped);
643 let flat_name = apply_flat_name_remap(&flat_name_raw).to_string();
644 let category = namespace_category(attr_group);
645 let pv = attr.value.clone();
646 tags.push(Tag {
647 id: TagId::Text(format!("{}:{}", attr_group, flat_name)),
648 name: flat_name.clone(),
649 description: flat_name,
650 group: TagGroup {
651 family0: "XMP".into(),
652 family1: format!("XMP-{}", elem_group),
653 family2: category.into(),
654 },
655 raw_value: parse_xmp_value(&attr.value),
656 print_value: pv,
657 priority: 0,
658 });
659 }
660 }
661
662 if name.local_name == "Seq"
665 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
666 {
667 if let Some(parent) = path.iter().rev().nth(1) {
669 if parent.1 == "Directory"
670 && parent.0 == "http://ns.google.com/photos/1.0/container/"
671 {
672 in_gcontainer_seq = true;
673 }
674 }
675 }
676
677 if in_gcontainer_seq
679 && name.local_name == "li"
680 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
681 {
682 in_gcontainer_li = true;
683 in_rdf_li = true;
684 } else if name.local_name == "li"
685 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
686 {
687 if in_lang_alt {
688 current_li_lang = attributes.iter()
690 .find(|a| a.name.local_name == "lang"
691 && (a.name.prefix.as_deref() == Some("xml")
692 || a.name.namespace.as_deref() == Some("http://www.w3.org/XML/1998/namespace")))
693 .map(|a| a.value.clone());
694 }
695 in_rdf_li = true;
697 }
698
699 if name.local_name == "Alt"
701 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
702 {
703 in_lang_alt = true;
704 let depth = path.len();
707 if depth >= 3 {
708 let li_elem = &path[depth - 2]; let bag_elem = &path[depth - 3]; if li_elem.1 == "li" && li_elem.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
711 && bag_elem.1 == "Bag" && bag_elem.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
712 {
713 lang_alt_in_bag = true;
714 }
715 }
716 }
717
718 if in_gcontainer_li
721 && name.local_name == "Item"
722 && name.namespace.as_deref() == Some("http://ns.google.com/photos/1.0/container/")
723 {
724 let mut found: std::collections::HashMap<String, String> =
726 std::collections::HashMap::new();
727 for attr in &attributes {
728 if attr.name.namespace.as_deref() == Some("http://ns.google.com/photos/1.0/container/item/") {
729 let field = ucfirst(&attr.name.local_name);
730 found.insert(field, attr.value.clone());
731 }
732 }
733 let known = ["Mime", "Semantic", "Length"];
736 for k in &known {
737 if let Some(v) = found.get(*k) {
738 gcontainer_fields.entry(k.to_string())
739 .or_default()
740 .push(v.clone());
741 }
742 }
743 }
744 }
745 Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
746 current_text.push_str(&text);
747 }
748 Ok(XmlEvent::EndElement { name }) => {
749 let ns_uri = name.namespace.as_deref().unwrap_or("");
750
751 if name.local_name == "li"
753 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
754 {
755 if in_lang_alt {
756 let lang = current_li_lang.take().unwrap_or_else(|| "x-default".to_string());
758 let text = normalize_xml_text(¤t_text);
759 let opt_text: Option<String> = Some(text.clone());
761 if lang_alt_in_bag {
762 let entry = bag_lang_values.entry(lang.clone()).or_default();
764 while entry.len() < bag_item_count {
767 entry.push(None);
768 }
769 entry.push(opt_text);
770 } else {
771 if lang == "x-default" {
773 list_values.push(text);
774 } else {
775 bag_lang_values.entry(lang).or_default().push(opt_text);
777 }
778 }
779 } else if lang_alt_in_bag && !in_lang_alt {
780 bag_item_count += 1;
784 } else if in_gcontainer_li {
786 in_gcontainer_li = false;
788 } else if !normalize_xml_text(¤t_text).is_empty() {
789 list_values.push(normalize_xml_text(¤t_text));
790 }
791 in_rdf_li = false;
792 path.pop();
793 current_text.clear();
794 continue;
795 }
796
797 if (name.local_name == "Seq"
799 || name.local_name == "Bag"
800 || name.local_name == "Alt")
801 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
802 {
803 if name.local_name == "Alt" {
805 in_lang_alt = false;
806 if lang_alt_in_bag {
808 path.pop();
810 current_text.clear();
811 continue;
812 }
813 if let Some(parent) = path.iter().rev().nth(1) {
816 let prefix = namespace_prefix(&parent.0);
817 let tag_name = parent.1.clone();
818 let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
819 let category = namespace_category(group_prefix);
820
821 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
825 let in_struct_li_alt = !parse_resource_depths.is_empty()
826 || path.iter().rev().nth(2)
827 .map(|(ns, ln)| ln == "li" && ns == rdf_ns)
828 .unwrap_or(false);
829 let (full_tag_name, emit_group_prefix, emit_category) = if in_struct_li_alt {
830 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &tag_name);
834 let field_uc = ucfirst(&strip_non_ascii(&tag_name));
835 if !ancestor_prefix.is_empty() {
836 let stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
837 let flat_raw = format!("{}{}", ancestor_prefix, stripped);
838 let flat = apply_flat_name_remap(&flat_raw).to_string();
839 let sp_gp = path.iter().rev()
841 .skip(2)
842 .skip_while(|(ns, ln)| ln == "li" || ln == "Bag" || ln == "Seq" || ln == "Alt" || ns == rdf_ns)
843 .find(|(ns, ln)| ln != "Description" && ns != rdf_ns)
844 .map(|(sp_ns, _)| {
845 let p = namespace_prefix(sp_ns);
846 if p.is_empty() { group_prefix } else { p }
847 })
848 .unwrap_or(group_prefix);
849 let cat = namespace_category(sp_gp);
850 (flat, sp_gp.to_string(), cat.to_string())
851 } else {
852 (ucfirst(&strip_non_ascii(&tag_name)), group_prefix.to_string(), category.to_string())
853 }
854 } else {
855 let tn = apply_flat_name_remap(&ucfirst(&strip_non_ascii(&tag_name))).to_string();
856 (tn, group_prefix.to_string(), category.to_string())
857 };
858
859 let has_nonempty = list_values.iter().any(|s| !s.is_empty());
862 if !list_values.is_empty() && has_nonempty {
863 let main_val = if list_values.len() == 1 {
864 Value::String(list_values[0].clone())
865 } else {
866 Value::List(list_values.iter().map(|s| Value::String(s.clone())).collect())
867 };
868 let pv = main_val.to_display_string();
869 tags.push(Tag {
870 id: TagId::Text(format!("{}:{}", emit_group_prefix, tag_name)),
871 name: full_tag_name.clone(),
872 description: full_tag_name.clone(),
873 group: TagGroup {
874 family0: "XMP".into(),
875 family1: format!("XMP-{}", emit_group_prefix),
876 family2: emit_category.clone(),
877 },
878 raw_value: main_val,
879 print_value: pv,
880 priority: 0,
881 });
882 }
883 list_values.clear();
884 let mut lang_keys: Vec<String> = bag_lang_values.keys().cloned().collect();
886 lang_keys.sort();
887 for lang in &lang_keys {
888 let vals = &bag_lang_values[lang];
889 let non_none: Vec<String> = vals.iter()
890 .filter_map(|v| v.clone())
891 .collect();
892 if !non_none.is_empty() {
893 let lang_tag = format!("{}-{}", full_tag_name, lang);
894 let val = if non_none.len() == 1 {
895 Value::String(non_none[0].clone())
896 } else {
897 Value::List(non_none.iter().map(|s| Value::String(s.clone())).collect())
898 };
899 let pv = val.to_display_string();
900 tags.push(Tag {
901 id: TagId::Text(format!("{}-{}:{}", emit_group_prefix, lang, tag_name)),
902 name: lang_tag.clone(),
903 description: lang_tag.clone(),
904 group: TagGroup {
905 family0: "XMP".into(),
906 family1: format!("XMP-{}", emit_group_prefix),
907 family2: emit_category.clone(),
908 },
909 raw_value: val,
910 print_value: pv,
911 priority: 0,
912 });
913 }
914 }
915 bag_lang_values.clear();
916 }
917 path.pop();
918 current_text.clear();
919 continue;
920 }
921
922 if name.local_name == "Bag" && lang_alt_in_bag {
924 lang_alt_in_bag = false;
925 bag_item_count = 0;
926 if let Some(parent) = path.iter().rev().nth(1) {
928 let prefix = namespace_prefix(&parent.0);
929 let tag_name = parent.1.clone();
930 let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
931 let category = namespace_category(group_prefix);
932
933 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &tag_name);
935 let field_uc = ucfirst(&strip_non_ascii(&tag_name));
936 let full_flat_base = if !ancestor_prefix.is_empty() {
937 let stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
938 let raw = format!("{}{}", ancestor_prefix, stripped);
939 apply_flat_name_remap(&raw).to_string()
940 } else {
941 apply_flat_name_remap(&field_uc).to_string()
942 };
943
944 let mut lang_keys: Vec<String> = bag_lang_values.keys().cloned().collect();
946 lang_keys.sort_by(|a, b| {
948 if a == "x-default" { std::cmp::Ordering::Less }
949 else if b == "x-default" { std::cmp::Ordering::Greater }
950 else { a.cmp(b) }
951 });
952
953 for lang in &lang_keys {
954 let vals = &bag_lang_values[lang];
955 let is_default = lang == "x-default"; let joined: String = vals.iter()
964 .filter_map(|v| v.as_deref()) .collect::<Vec<_>>()
966 .join(", ");
967
968 let has_content = vals.iter().any(|v| v.is_some());
970 if !has_content {
971 continue;
972 }
973
974 let (tag_key, tag_display) = if is_default {
975 (full_flat_base.clone(), full_flat_base.clone())
976 } else {
977 let lt = format!("{}-{}", full_flat_base, lang);
978 (lt.clone(), lt)
979 };
980
981 tags.push(Tag {
982 id: TagId::Text(format!("{}:{}", group_prefix, tag_key)),
983 name: tag_key.clone(),
984 description: tag_display,
985 group: TagGroup {
986 family0: "XMP".into(),
987 family1: format!("XMP-{}", group_prefix),
988 family2: category.into(),
989 },
990 raw_value: Value::String(joined.clone()),
991 print_value: joined,
992 priority: 0,
993 });
994 }
995 bag_lang_values.clear();
996 }
997 path.pop();
998 current_text.clear();
999 continue;
1000 }
1001
1002 if in_gcontainer_seq && name.local_name == "Seq" {
1004 in_gcontainer_seq = false;
1005 for (field, values) in &gcontainer_fields {
1007 let tag_name = format!("DirectoryItem{}", field);
1008 let value = if values.len() == 1 {
1009 Value::String(values[0].clone())
1010 } else {
1011 Value::List(values.iter().map(|s| Value::String(s.clone())).collect())
1012 };
1013 let print_value = value.to_display_string();
1014 tags.push(Tag {
1015 id: TagId::Text(format!("GContainer:{}", tag_name)),
1016 name: tag_name.clone(),
1017 description: tag_name.clone(),
1018 group: TagGroup {
1019 family0: "XMP".into(),
1020 family1: "XMP-GContainer".into(),
1021 family2: "Image".into(),
1022 },
1023 raw_value: value,
1024 print_value,
1025 priority: 0,
1026 });
1027 }
1028 gcontainer_fields.clear();
1029 } else if !list_values.is_empty() {
1030 if let Some(parent) = path.iter().rev().nth(1) {
1032 let prefix = namespace_prefix(&parent.0);
1033 let tag_name = &parent.1;
1034 if tag_name == "RDF" || tag_name == "xmpmeta" || tag_name == "xapmeta"
1036 || parent.0 == "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
1037 || parent.0 == "adobe:ns:meta/"
1038 {
1039 list_values.clear();
1040 path.pop();
1041 current_text.clear();
1042 continue;
1043 }
1044 let group_prefix =
1045 if prefix.is_empty() { "XMP" } else { prefix };
1046 let _category = namespace_category(group_prefix);
1047
1048 let value = if list_values.len() == 1 {
1049 Value::String(list_values[0].clone())
1050 } else {
1051 Value::List(
1052 list_values
1053 .iter()
1054 .map(|s| Value::String(s.clone()))
1055 .collect(),
1056 )
1057 };
1058
1059 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, tag_name);
1061 let field_uc = ucfirst(&strip_non_ascii(tag_name));
1062 let (full_name, emit_group_prefix) = if !ancestor_prefix.is_empty() {
1063 let field_stripped = strip_struct_prefix(&ancestor_prefix, &field_uc);
1064 let raw = format!("{}{}", ancestor_prefix, field_stripped);
1065 let flat = apply_flat_name_remap(&raw).to_string();
1066 let sp_gp = path.iter().rev()
1068 .skip(1) .skip(1) .skip_while(|(ns, ln)| ln == "li" || ln == "Bag" || ln == "Seq" || ln == "Alt" || ns == "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1071 .find(|(ns, ln)| ln != "Description" && ns != "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1072 .map(|(sp_ns, _)| {
1073 let p = namespace_prefix(sp_ns);
1074 if p.is_empty() { group_prefix } else { p }
1075 })
1076 .unwrap_or(group_prefix);
1077 (flat, sp_gp.to_string())
1078 } else {
1079 let flat = apply_flat_name_remap(&field_uc).to_string();
1080 (flat, group_prefix.to_string())
1081 };
1082
1083 let emit_cat = namespace_category(&emit_group_prefix);
1084 let print_value = value.to_display_string();
1085
1086 tags.push(Tag {
1087 id: TagId::Text(format!("{}:{}", emit_group_prefix, tag_name)),
1088 name: full_name.clone(),
1089 description: full_name,
1090 group: TagGroup {
1091 family0: "XMP".to_string(),
1092 family1: format!("XMP-{}", emit_group_prefix),
1093 family2: emit_cat.to_string(),
1094 },
1095 raw_value: value,
1096 print_value,
1097 priority: 0,
1098 });
1099 }
1100 list_values.clear();
1101 }
1102 path.pop();
1103 current_text.clear();
1104 continue;
1105 }
1106
1107 if !normalize_xml_text(¤t_text).is_empty() && in_rdf_li
1110 && name.namespace.as_deref() != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1111 && name.local_name != "Description"
1112 {
1113 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, &name.local_name);
1116 if !ancestor_prefix.is_empty() {
1117 let field_local = ucfirst(&strip_non_ascii(&name.local_name));
1118 let field_stripped = strip_struct_prefix(&ancestor_prefix, &field_local);
1119 let flat_name_raw = format!("{}{}", ancestor_prefix, field_stripped);
1120 let flat_name = apply_flat_name_remap(&flat_name_raw).to_string();
1121 let prefix = namespace_prefix(ns_uri);
1122 let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
1123 let category = namespace_category(group_prefix);
1124 tags.push(Tag {
1125 id: TagId::Text(format!("{}:{}", group_prefix, flat_name)),
1126 name: flat_name.clone(), description: flat_name,
1127 group: TagGroup { family0: "XMP".into(), family1: format!("XMP-{}", group_prefix), family2: category.into() },
1128 raw_value: parse_xmp_value(&normalize_xml_text(¤t_text)),
1129 print_value: normalize_xml_text(¤t_text), priority: 0,
1130 });
1131 }
1132 path.pop();
1133 current_text.clear();
1134 continue;
1135 }
1136
1137 let has_et_depth = emit_empty_depths.contains(&path.len());
1140 let in_suppressed_blank_node = suppress_direct_emit_depth
1141 .map(|d| path.len() > d)
1142 .unwrap_or(false);
1143 if (!normalize_xml_text(¤t_text).is_empty() || has_et_depth) && !in_rdf_li && !in_suppressed_blank_node {
1144 let prefix = namespace_prefix(ns_uri);
1145 let tag_name = &name.local_name;
1146
1147 if tag_name != "Description"
1149 && name.namespace.as_deref()
1150 != Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1151 {
1152 let group_prefix = if prefix.is_empty() { "XMP" } else { prefix };
1153 let category = namespace_category(group_prefix);
1154
1155 let text_val = normalize_xml_text(¤t_text);
1156 let value = parse_xmp_value(&text_val);
1157 let print_value = value.to_display_string();
1158
1159 let remapped = remap_xmp_tag_name(group_prefix, tag_name);
1163 let ancestor_prefix = build_struct_tag_prefix_without_last(&path, tag_name);
1164 let full_name = if !ancestor_prefix.is_empty() {
1165 let field_stripped = strip_struct_prefix(&ancestor_prefix, &remapped);
1166 let candidate = format!("{}{}", ancestor_prefix, field_stripped);
1167 apply_flat_name_remap(&candidate).to_string()
1168 } else {
1169 apply_flat_name_remap(&remapped).to_string()
1170 };
1171
1172 tags.push(Tag {
1173 id: TagId::Text(format!("{}:{}", group_prefix, tag_name)),
1174 name: full_name.clone(),
1175 description: full_name,
1176 group: TagGroup {
1177 family0: "XMP".to_string(),
1178 family1: format!("XMP-{}", group_prefix),
1179 family2: category.to_string(),
1180 },
1181 raw_value: value,
1182 print_value,
1183 priority: 0,
1184 });
1185 }
1186 }
1187
1188 if parse_resource_depths.last() == Some(&path.len()) {
1190 parse_resource_depths.pop();
1191 }
1192 emit_empty_depths.remove(&path.len());
1193
1194 if name.local_name == "Description"
1198 && name.namespace.as_deref() == Some("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
1199 {
1200 if suppress_direct_emit_depth == Some(path.len()) {
1202 suppress_direct_emit_depth = None;
1203 }
1204
1205 if let Some((node_id, parent_local)) = inline_blank_node_stack.last().cloned() {
1206 if path.last().map(|(_, ln)| ln == "Description").unwrap_or(false) {
1208 inline_blank_node_stack.pop();
1209 if let Some(bn_props) = blank_node_props.get(&node_id) {
1210 let parent_uc = ucfirst(&strip_non_ascii(&parent_local));
1211 let parent_ns = path.iter().rev().nth(1)
1213 .map(|(ns, _)| ns.as_str())
1214 .unwrap_or("");
1215 let parent_prefix_ns = namespace_prefix(parent_ns);
1216 let parent_group = if parent_prefix_ns.is_empty() {
1217 "XMP"
1218 } else { parent_prefix_ns };
1219 for (prop_ns, prop_local, prop_val) in bn_props {
1220 let prop_prefix = namespace_prefix(prop_ns);
1221 let prop_group = if prop_prefix.is_empty() { parent_group } else { prop_prefix };
1222 let prop_cat = namespace_category(prop_group);
1223 let prop_uc = ucfirst(&strip_non_ascii(prop_local));
1224 let stripped = strip_struct_prefix(&parent_uc, &prop_uc);
1225 let flat_raw = format!("{}{}", parent_uc, stripped);
1226 let flat = apply_flat_name_remap(&flat_raw).to_string();
1227 if !tags.iter().any(|t| t.name == flat) {
1229 tags.push(Tag {
1230 id: TagId::Text(format!("{}:{}", prop_group, flat)),
1231 name: flat.clone(),
1232 description: flat,
1233 group: TagGroup {
1234 family0: "XMP".into(),
1235 family1: format!("XMP-{}", prop_group),
1236 family2: prop_cat.into(),
1237 },
1238 raw_value: Value::String(prop_val.clone()),
1239 print_value: prop_val.clone(),
1240 priority: 0,
1241 });
1242 }
1243 }
1244 }
1245 }
1246 }
1247 }
1248
1249 path.pop();
1250 current_text.clear();
1251 }
1252 Err(_) => continue,
1253 _ => {}
1254 }
1255 }
1256
1257 let has_gainmap = tags.iter().any(|t| {
1259 t.name == "DirectoryItemSemantic"
1260 && t.print_value.contains("GainMap")
1261 });
1262 if has_gainmap {
1263 let gainmap_mime = tags.iter()
1266 .find(|t| t.name == "DirectoryItemSemantic")
1267 .and_then(|t| {
1268 if let Value::List(ref items) = t.raw_value {
1271 items.iter().enumerate()
1272 .find(|(_, v)| v.to_display_string() == "GainMap")
1273 .map(|(i, _)| i)
1274 } else {
1275 None
1276 }
1277 })
1278 .and_then(|idx| {
1279 tags.iter()
1280 .find(|t| t.name == "DirectoryItemMime")
1281 .and_then(|t| match &t.raw_value {
1282 Value::List(items) => items.get(idx).map(|v| v.to_display_string()),
1283 Value::String(s) => if idx == 0 { Some(s.clone()) } else { None },
1284 _ => None,
1285 })
1286 })
1287 .unwrap_or_else(|| "image/jpeg".to_string());
1288
1289 let warning_msg = format!(
1290 "[minor] Error reading GainMap {} from trailer",
1291 gainmap_mime
1292 );
1293 tags.push(Tag {
1294 id: TagId::Text("Warning".into()),
1295 name: "Warning".into(),
1296 description: "Warning".into(),
1297 group: TagGroup {
1298 family0: "ExifTool".into(),
1299 family1: "ExifTool".into(),
1300 family2: "Other".into(),
1301 },
1302 raw_value: Value::String(warning_msg.clone()),
1303 print_value: warning_msg,
1304 priority: 0,
1305 });
1306 }
1307
1308 if !tags.iter().any(|t| t.name == "Flash") {
1311 let get_bool = |name: &str| -> Option<bool> {
1312 tags.iter().find(|t| t.name == name)
1313 .map(|t| t.print_value.to_lowercase() == "true")
1314 };
1315 let get_int = |name: &str| -> Option<u32> {
1316 tags.iter().find(|t| t.name == name)
1317 .and_then(|t| t.print_value.parse::<u32>().ok())
1318 };
1319 let flash_fired = get_bool("FlashFired");
1320 let flash_return = get_int("FlashReturn");
1321 let flash_mode = get_int("FlashMode");
1322 let flash_function = get_bool("FlashFunction");
1323 let flash_red_eye = get_bool("FlashRedEyeMode");
1324 if flash_fired.is_some() || flash_return.is_some() || flash_mode.is_some()
1326 || flash_function.is_some() || flash_red_eye.is_some()
1327 {
1328 let val: u32 =
1329 (if flash_fired.unwrap_or(false) { 0x01 } else { 0 }) |
1330 ((flash_return.unwrap_or(0) & 0x03) << 1) |
1331 ((flash_mode.unwrap_or(0) & 0x03) << 3) |
1332 (if flash_function.unwrap_or(false) { 0x20 } else { 0 }) |
1333 (if flash_red_eye.unwrap_or(false) { 0x40 } else { 0 });
1334 let flash_str = flash_numeric_to_string(val);
1335 tags.push(Tag {
1336 id: TagId::Text("Flash:Flash".into()),
1337 name: "Flash".into(),
1338 description: "Flash".into(),
1339 group: TagGroup {
1340 family0: "XMP".into(),
1341 family1: "XMP-exif".into(),
1342 family2: "Camera".into(),
1343 },
1344 raw_value: Value::String(format!("{}", val)),
1345 print_value: flash_str,
1346 priority: 0,
1347 });
1348 }
1349 }
1350
1351 let tags = aggregate_duplicate_xmp_tags(tags);
1356
1357 Ok(tags)
1358 }
1359}
1360
1361fn aggregate_duplicate_xmp_tags(tags: Vec<Tag>) -> Vec<Tag> {
1365 let mut result: Vec<Tag> = Vec::with_capacity(tags.len());
1366 let mut name_to_idx: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
1367
1368 for tag in tags {
1369 if let Some(&idx) = name_to_idx.get(&tag.name) {
1370 let existing = &mut result[idx];
1372 if existing.print_value != tag.print_value {
1373 existing.print_value = format!("{}, {}", existing.print_value, tag.print_value);
1374 }
1375 } else {
1377 let idx = result.len();
1378 name_to_idx.insert(tag.name.clone(), idx);
1379 result.push(tag);
1380 }
1381 }
1382 result
1383}
1384
1385fn flash_numeric_to_string(val: u32) -> String {
1387 match val {
1388 0x00 => "No Flash".into(),
1389 0x01 => "Fired".into(),
1390 0x05 => "Fired, Return not detected".into(),
1391 0x07 => "Fired, Return detected".into(),
1392 0x08 => "On, Did not fire".into(),
1393 0x09 => "On, Fired".into(),
1394 0x0d => "On, Return not detected".into(),
1395 0x0f => "On, Return detected".into(),
1396 0x10 => "Off, Did not fire".into(),
1397 0x14 => "Off, Did not fire, Return not detected".into(),
1398 0x18 => "Auto, Did not fire".into(),
1399 0x19 => "Auto, Fired".into(),
1400 0x1d => "Auto, Fired, Return not detected".into(),
1401 0x1f => "Auto, Fired, Return detected".into(),
1402 0x20 => "No flash function".into(),
1403 0x30 => "Off, No flash function".into(),
1404 0x41 => "Fired, Red-eye reduction".into(),
1405 0x45 => "Fired, Red-eye reduction, Return not detected".into(),
1406 0x47 => "Fired, Red-eye reduction, Return detected".into(),
1407 0x49 => "On, Red-eye reduction".into(),
1408 0x4d => "On, Red-eye reduction, Return not detected".into(),
1409 0x4f => "On, Red-eye reduction, Return detected".into(),
1410 0x50 => "Off, Red-eye reduction".into(),
1411 0x58 => "Auto, Did not fire, Red-eye reduction".into(),
1412 0x59 => "Auto, Fired, Red-eye reduction".into(),
1413 0x5d => "Auto, Fired, Red-eye reduction, Return not detected".into(),
1414 0x5f => "Auto, Fired, Red-eye reduction, Return detected".into(),
1415 _ => format!("Unknown (0x{:02x})", val),
1416 }
1417}
1418
1419
1420fn build_struct_tag_prefix_without_last(path: &[(String, String)], exclude_ln: &str) -> String {
1423 build_struct_tag_prefix_excluding(path, Some(exclude_ln))
1424}
1425
1426fn build_struct_tag_prefix_excluding(path: &[(String, String)], exclude_last: Option<&str>) -> String {
1427 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1428 let mut result = String::new();
1429 let effective_path: &[(String, String)] = if let Some(excl) = exclude_last {
1430 let mut end = path.len();
1432 for i in (0..path.len()).rev() {
1433 if path[i].1 == excl {
1434 end = i;
1435 break;
1436 }
1437 }
1438 &path[..end]
1439 } else {
1440 path
1441 };
1442 for (ns, ln) in effective_path {
1443 if ns == rdf_ns || ln == "Description" || ln == "RDF"
1444 || ln == "xmpmeta" || ln == "xapmeta"
1445 || ns == "adobe:ns:meta/"
1446 {
1447 continue;
1448 }
1449 if ln == "Seq" || ln == "Bag" || ln == "Alt" || ln == "li" {
1451 continue;
1452 }
1453 let part = ucfirst(&strip_non_ascii(ln));
1454 if result.is_empty() {
1455 result = part;
1456 } else {
1457 let stripped = strip_struct_prefix(&result, &part);
1459 result = format!("{}{}", result, stripped);
1460 }
1461 }
1462 result
1463}
1464
1465fn strip_struct_prefix(parent: &str, field: &str) -> String {
1473 if field.starts_with(parent) && field.len() > parent.len() {
1475 let stripped = &field[parent.len()..];
1476 if !stripped.is_empty() {
1477 return stripped.to_string();
1478 }
1479 }
1480
1481 let parent_chars: Vec<char> = parent.chars().collect();
1483 for start in 1..parent_chars.len().saturating_sub(1) {
1484 if !parent_chars[start].is_uppercase() {
1486 continue;
1487 }
1488 let suffix: String = parent_chars[start..].iter().collect();
1489 if field.starts_with(suffix.as_str()) && suffix.len() > 1 {
1490 let stripped = &field[suffix.len()..];
1491 if !stripped.is_empty() {
1492 return stripped.to_string();
1493 }
1494 }
1495 }
1496 field.to_string()
1497}
1498
1499fn remap_xmp_tag_name(group_prefix: &str, local_name: &str) -> String {
1501 let clean_name = strip_non_ascii(local_name);
1503 let local_name = clean_name.as_str();
1504 match (group_prefix, local_name) {
1505 ("tiff", "ImageLength") => "ImageHeight".into(),
1507 ("tiff", "BitsPerSample") => "BitsPerSample".into(),
1508 ("exif", "PixelXDimension") => "ExifImageWidth".into(),
1510 ("exif", "PixelYDimension") => "ExifImageHeight".into(),
1511 ("exif", "ExposureBiasValue") => "ExposureCompensation".into(),
1512 ("photoshop", "ICCProfile") => "ICCProfileName".into(),
1514 ("photoshop", "ColorMode") => "ColorMode".into(),
1515 ("plus", "Version") => "PLUSVersion".into(),
1517 _ => {
1518 let has_lowercase = local_name.chars().any(|c| c.is_lowercase());
1523 let has_uppercase = local_name.chars().any(|c| c.is_uppercase());
1524 if has_uppercase && !has_lowercase && local_name.len() > 1 {
1525 ucfirst(&local_name.to_lowercase())
1526 } else {
1527 ucfirst(local_name)
1528 }
1529 }
1530 }
1531}
1532
1533fn apply_flat_name_remap(name: &str) -> String {
1539 if let Some(rest) = name.strip_prefix("RegionsRegionListExtensions") {
1543 let remapped_rest = apply_flat_name_remap(rest);
1544 return format!("RegionExtensions{}", remapped_rest);
1545 }
1546
1547 let mapped = match name {
1548 "ArtworkOrObjectAOCopyrightNotice" => "ArtworkCopyrightNotice",
1550 "ArtworkOrObjectAOCreator" => "ArtworkCreator",
1551 "ArtworkOrObjectAODateCreated" => "ArtworkDateCreated",
1552 "ArtworkOrObjectAOSource" => "ArtworkSource",
1553 "ArtworkOrObjectAOSourceInvNo" => "ArtworkSourceInventoryNo",
1554 "ArtworkOrObjectAOTitle" => "ArtworkTitle",
1555 "ArtworkOrObjectAOCurrentCopyrightOwnerName" => "ArtworkCopyrightOwnerName",
1556 "ArtworkOrObjectAOCurrentCopyrightOwnerId" => "ArtworkCopyrightOwnerID",
1557 "ArtworkOrObjectAOCurrentLicensorName" => "ArtworkLicensorName",
1558 "ArtworkOrObjectAOCurrentLicensorId" => "ArtworkLicensorID",
1559 "ArtworkOrObjectAOCreatorId" => "ArtworkCreatorID",
1560 "ArtworkOrObjectAOCircaDateCreated" => "ArtworkCircaDateCreated",
1561 "ArtworkOrObjectAOStylePeriod" => "ArtworkStylePeriod",
1562 "ArtworkOrObjectAOSourceInvURL" => "ArtworkSourceInvURL",
1563 "ArtworkOrObjectAOContentDescription" => "ArtworkContentDescription",
1564 "ArtworkOrObjectAOContributionDescription" => "ArtworkContributionDescription",
1565 "ArtworkOrObjectAOPhysicalDescription" => "ArtworkPhysicalDescription",
1566 "RegionsRegionListName" => "RegionName",
1568 "RegionsRegionListType" => "RegionType",
1569 "RegionsRegionListDescription" => "RegionDescription",
1570 "RegionsRegionListFocusUsage" => "RegionFocusUsage",
1571 "RegionsRegionListBarCodeValue" => "RegionBarCodeValue",
1572 "RegionsRegionListSeeAlso" => "RegionSeeAlso",
1573 "RegionsRegionListRotation" => "RegionRotation",
1574 "RegionsRegionListAreaH" => "RegionAreaH",
1575 "RegionsRegionListAreaW" => "RegionAreaW",
1576 "RegionsRegionListAreaX" => "RegionAreaX",
1577 "RegionsRegionListAreaY" => "RegionAreaY",
1578 "RegionsRegionListAreaD" => "RegionAreaD",
1579 "RegionsRegionListAreaUnit" => "RegionAreaUnit",
1580 "KeywordsHierarchyKeyword" => "HierarchicalKeywords1",
1582 "KeywordsHierarchyChildrenKeyword" => "HierarchicalKeywords2",
1583 "KeywordsHierarchyChildrenChildrenKeyword" => "HierarchicalKeywords3",
1584 "KeywordsHierarchyChildrenChildrenChildrenKeyword" => "HierarchicalKeywords4",
1585 "KeywordsHierarchyChildrenChildrenChildrenChildrenKeyword" => "HierarchicalKeywords5",
1586 "KeywordsHierarchyChildrenChildrenChildrenChildrenChildrenKeyword" => "HierarchicalKeywords6",
1587 "ColorantsSwatchName" => "ColorantSwatchName",
1589 "ColorantsMode" => "ColorantMode",
1590 "ColorantsType" => "ColorantType",
1591 "ColorantsCyan" => "ColorantCyan",
1592 "ColorantsMagenta" => "ColorantMagenta",
1593 "ColorantsYellow" => "ColorantYellow",
1594 "ColorantsBlack" => "ColorantBlack",
1595 "ColorantsGray" => "ColorantGray",
1596 "ColorantsRed" => "ColorantRed",
1597 "ColorantsGreen" => "ColorantGreen",
1598 "ColorantsBlue" => "ColorantBlue",
1599 "ColorantsL" => "ColorantL",
1600 "ColorantsA" => "ColorantA",
1601 "ColorantsB" => "ColorantB",
1602 "FontsFontName" => "FontName",
1604 "FontsFontFamily" => "FontFamily",
1605 "FontsFontFace" => "FontFace",
1606 "FontsFontType" => "FontType",
1607 "FontsVersionString" => "FontVersion",
1608 "FontsComposite" => "FontComposite",
1609 "FontsFontFileName" => "FontFileName",
1610 "ThumbnailsFormat" => "ThumbnailFormat",
1612 "ThumbnailsWidth" => "ThumbnailWidth",
1613 "ThumbnailsHeight" => "ThumbnailHeight",
1614 "ThumbnailsImage" => "ThumbnailImage",
1615 _ => name,
1616 };
1617 mapped.to_string()
1618}
1619
1620fn parse_xmp_value(text: &str) -> Value {
1624 if let Some(slash) = text.find('/') {
1626 let num_str = &text[..slash];
1627 let den_str = &text[slash+1..];
1628 if !num_str.is_empty() && !den_str.is_empty()
1629 && !num_str.contains(' ') && !den_str.contains(' ')
1630 {
1631 if let (Ok(n), Ok(d)) = (num_str.parse::<i64>(), den_str.parse::<u64>()) {
1632 if d > 0 {
1633 if n >= 0 {
1634 return Value::URational(n as u32, d as u32);
1635 } else {
1636 return Value::IRational(n as i32, d as i32);
1637 }
1638 }
1639 }
1640 }
1641 }
1642 Value::String(text.to_string())
1643}
1644
1645fn ucfirst(s: &str) -> String {
1646 let mut c = s.chars();
1647 match c.next() {
1648 None => String::new(),
1649 Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
1650 }
1651}
1652
1653fn strip_non_ascii(s: &str) -> String {
1659 s.chars().filter(|c| c.is_ascii()).collect()
1660}
1661
1662fn xml_elem_to_camel(s: &str) -> String {
1666 if s.contains('_') || s.chars().all(|c| c.is_uppercase() || !c.is_alphabetic()) {
1669 let lower = s.to_lowercase();
1670 let mut result = String::with_capacity(lower.len());
1671 let mut capitalize_next = true;
1672 for ch in lower.chars() {
1673 if ch == '_' {
1674 capitalize_next = true;
1675 } else if capitalize_next {
1676 for c in ch.to_uppercase() {
1677 result.push(c);
1678 }
1679 capitalize_next = false;
1680 } else {
1681 result.push(ch);
1682 }
1683 }
1684 result
1685 } else {
1686 let mut chars = s.chars();
1688 match chars.next() {
1689 None => String::new(),
1690 Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
1691 }
1692 }
1693}
1694
1695fn normalize_xml_text(s: &str) -> String {
1699 let trimmed = s.trim();
1700 if !trimmed.contains('\n') && !trimmed.contains('\r') {
1701 return trimmed.to_string();
1703 }
1704 let mut result = String::with_capacity(trimmed.len());
1706 let mut last_was_space = false;
1707 for c in trimmed.chars() {
1708 if c.is_whitespace() {
1709 if !last_was_space {
1710 result.push(' ');
1711 last_was_space = true;
1712 }
1713 } else {
1714 result.push(c);
1715 last_was_space = false;
1716 }
1717 }
1718 result
1719}
1720
1721fn read_generic_xml(xml: &str) -> Result<Vec<Tag>> {
1724 use xml::reader::{EventReader, XmlEvent};
1725 let mut tags = Vec::new();
1726 let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
1727
1728 let parser = EventReader::from_str(xml);
1729 let mut path: Vec<String> = Vec::new(); let mut current_text = String::new();
1731 let mut has_children: Vec<bool> = Vec::new();
1734
1735 for event in parser {
1742 match event {
1743 Ok(XmlEvent::StartElement { name, attributes, namespace, .. }) => {
1744 let local = xml_elem_to_camel(&name.local_name);
1745 let path_str = format!("{}{}", path.join(""), local);
1746 if let Some(last) = has_children.last_mut() {
1748 *last = true;
1749 }
1750 path.push(local.clone());
1751 has_children.push(false);
1752 current_text.clear();
1753
1754 if path.len() == 1 {
1768 if let Some(default_ns) = namespace.get("") {
1770 let tag_name = format!("{}Xmlns", local);
1772 if !seen_names.contains(&tag_name) {
1773 seen_names.insert(tag_name.clone());
1774 let val = Value::String(default_ns.to_string());
1775 let pv = val.to_display_string();
1776 tags.push(Tag {
1777 id: TagId::Text(format!("XMP:{}", tag_name)),
1778 name: tag_name.clone(), description: tag_name,
1779 group: TagGroup { family0: "XMP".into(), family1: "XMP".into(), family2: "Other".into() },
1780 raw_value: val, print_value: pv, priority: 0,
1781 });
1782 }
1783 }
1784 }
1785
1786 for attr in &attributes {
1788 let aname = &attr.name;
1789 if aname.prefix.as_deref() == Some("xmlns")
1790 || aname.local_name == "xmlns"
1791 || aname.local_name.starts_with("xmlns:")
1792 {
1793 continue;
1795 }
1796 let attr_local = xml_elem_to_camel(&aname.local_name);
1798 let tag_name = format!("{}{}", path_str, attr_local);
1799 if !seen_names.contains(&tag_name) {
1800 seen_names.insert(tag_name.clone());
1801 let attr_ns = aname.namespace.as_deref().unwrap_or("");
1803 let pfx = namespace_prefix(attr_ns);
1804 let group_pfx = if pfx.is_empty() {
1805 aname.prefix.as_deref().unwrap_or("XMP")
1806 } else { pfx };
1807 let attr_val = normalize_xml_text(&attr.value);
1809 let val = Value::String(attr_val.clone());
1810 let pv = val.to_display_string();
1811 tags.push(Tag {
1812 id: TagId::Text(format!("XMP:{}", tag_name)),
1813 name: tag_name.clone(), description: tag_name,
1814 group: TagGroup { family0: "XMP".into(), family1: format!("XMP-{}", group_pfx), family2: "Other".into() },
1815 raw_value: val, print_value: pv, priority: 0,
1816 });
1817 }
1818 }
1819 }
1820 Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
1821 current_text.push_str(&text);
1822 }
1823 Ok(XmlEvent::EndElement { .. }) => {
1824 let text = normalize_xml_text(¤t_text);
1825 let is_leaf = has_children.last().copied().unwrap_or(false) == false;
1826 if (is_leaf || !text.is_empty()) && !path.is_empty() {
1828 let tag_name = path.join("");
1829 if !seen_names.contains(&tag_name) {
1830 seen_names.insert(tag_name.clone());
1831 let val = Value::String(text.clone());
1832 let pv = val.to_display_string();
1833 tags.push(Tag {
1834 id: TagId::Text(format!("XMP:{}", tag_name)),
1835 name: tag_name.clone(), description: tag_name,
1836 group: TagGroup { family0: "XMP".into(), family1: "XMP".into(), family2: "Other".into() },
1837 raw_value: val, print_value: pv, priority: 0,
1838 });
1839 }
1840 }
1841 current_text.clear();
1842 has_children.pop();
1843 path.pop();
1844 }
1845 Err(_) => continue,
1846 _ => {}
1847 }
1848 }
1849 Ok(tags)
1850}
1851
1852fn fix_malformed_xml(xml: &str) -> String {
1856 let mut stack: Vec<String> = Vec::new();
1857 let mut result = String::with_capacity(xml.len());
1858 let mut pos = 0;
1859
1860 while pos < xml.len() {
1861 if let Some(rel) = xml[pos..].find('<') {
1862 let lt = pos + rel;
1863 result.push_str(&xml[pos..lt]);
1865 pos = lt;
1866
1867 let rest = &xml[pos..];
1869 if rest.starts_with("</") {
1870 if let Some(gt_rel) = rest.find('>') {
1872 let tag_name = rest[2..gt_rel].trim().to_string();
1873 let gt = pos + gt_rel;
1874 if stack.last().map(|s| s == &tag_name).unwrap_or(false) {
1875 stack.pop();
1877 result.push_str(&xml[pos..=gt]);
1878 } else if stack.contains(&tag_name) {
1879 while stack.last().map(|s| s != &tag_name).unwrap_or(false) {
1881 stack.pop();
1882 }
1883 stack.pop();
1884 result.push_str(&xml[pos..=gt]);
1885 }
1886 pos = gt + 1;
1888 } else {
1889 result.push('<');
1890 pos += 1;
1891 }
1892 } else if rest.starts_with("<!") || rest.starts_with("<?") {
1893 let end = if rest.starts_with("<!--") {
1895 rest.find("-->").map(|e| pos + e + 3)
1896 } else if rest.starts_with("<![CDATA[") {
1897 rest.find("]]>").map(|e| pos + e + 3)
1898 } else {
1899 rest.find("?>").map(|e| pos + e + 2)
1901 };
1902 if let Some(end_pos) = end {
1903 result.push_str(&xml[pos..end_pos]);
1904 pos = end_pos;
1905 } else {
1906 result.push('<');
1907 pos += 1;
1908 }
1909 } else {
1910 if let Some(gt_rel) = rest.find('>') {
1912 let gt = pos + gt_rel;
1913 let inner = rest[1..gt_rel].trim();
1914 let is_self_closing = inner.ends_with('/');
1915 if !is_self_closing {
1916 let tag_name = inner.split(|c: char| c.is_whitespace() || c == '/')
1917 .next().unwrap_or("").to_string();
1918 if !tag_name.is_empty() {
1919 stack.push(tag_name);
1920 }
1921 }
1922 result.push_str(&xml[pos..=gt]);
1923 pos = gt + 1;
1924 } else {
1925 result.push('<');
1926 pos += 1;
1927 }
1928 }
1929 } else {
1930 result.push_str(&xml[pos..]);
1932 break;
1933 }
1934 }
1935 result
1936}
1937
1938fn sanitize_xmp_xml(xml: &str) -> String {
1941 let mut result = String::with_capacity(xml.len());
1942 let mut in_pi = false; let chars: Vec<char> = xml.chars().collect();
1944 let mut i = 0;
1945 while i < chars.len() {
1946 let c = chars[i];
1947 if !in_pi && i + 1 < chars.len() && c == '<' && chars[i+1] == '?' {
1948 in_pi = true;
1949 result.push(c);
1950 } else if in_pi && c == '?' && i + 1 < chars.len() && chars[i+1] == '>' {
1951 in_pi = false;
1952 result.push(c);
1953 result.push(chars[i+1]);
1954 i += 2;
1955 continue;
1956 } else if in_pi {
1957 if c == '\t' || c == '\n' || c == '\r' || (c as u32 >= 0x20 && c as u32 <= 0xD7FF)
1959 || (c as u32 >= 0xE000 && c as u32 <= 0xFFFD)
1960 || (c as u32 >= 0x10000 && c as u32 <= 0x10FFFF) {
1961 result.push(c);
1962 } else {
1963 result.push(' ');
1964 }
1965 } else {
1966 result.push(c);
1967 }
1968 i += 1;
1969 }
1970 result
1971}
1972
1973fn collect_node_bag_values(xml: &str) -> std::collections::HashMap<String, Vec<String>> {
1976 use xml::reader::{EventReader, XmlEvent};
1977 let mut map: std::collections::HashMap<String, Vec<String>> = std::collections::HashMap::new();
1978
1979 let parser = EventReader::from_str(xml);
1980 let mut current_node_id: Option<String> = None;
1981 let mut current_items: Vec<String> = Vec::new();
1982 let mut in_li = false;
1983 let mut current_text = String::new();
1984 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
1985
1986 for event in parser {
1987 match event {
1988 Ok(XmlEvent::StartElement { name, attributes, .. }) => {
1989 current_text.clear();
1990 let local = &name.local_name;
1991 let ns = name.namespace.as_deref().unwrap_or("");
1992
1993 if (local == "Bag" || local == "Seq" || local == "Alt") && ns == rdf_ns {
1994 if let Some(nid) = attributes.iter().find(|a| {
1996 a.name.local_name == "nodeID"
1997 && (a.name.prefix.as_deref() == Some("rdf") || ns == rdf_ns)
1998 }) {
1999 current_node_id = Some(nid.value.clone());
2000 current_items.clear();
2001 }
2002 } else if local == "li" && ns == rdf_ns && current_node_id.is_some() {
2003 in_li = true;
2004 }
2005 }
2006 Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
2007 if in_li {
2008 current_text.push_str(&text);
2009 }
2010 }
2011 Ok(XmlEvent::EndElement { name }) => {
2012 let local = &name.local_name;
2013 let ns = name.namespace.as_deref().unwrap_or("");
2014 if local == "li" && ns == rdf_ns && in_li {
2015 let val = normalize_xml_text(¤t_text);
2016 if !val.is_empty() {
2017 current_items.push(val);
2018 }
2019 in_li = false;
2020 current_text.clear();
2021 } else if (local == "Bag" || local == "Seq" || local == "Alt") && ns == rdf_ns {
2022 if let Some(nid) = current_node_id.take() {
2023 map.insert(nid, std::mem::take(&mut current_items));
2024 }
2025 }
2026 }
2027 Err(_) => continue,
2028 _ => {}
2029 }
2030 }
2031 map
2032}
2033
2034fn collect_inline_referenced_node_ids(xml: &str) -> std::collections::HashSet<String> {
2039 use xml::reader::{EventReader, XmlEvent};
2040 let mut set: std::collections::HashSet<String> = std::collections::HashSet::new();
2041 let parser = EventReader::from_str(xml);
2042 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
2043 let mut path: Vec<(String, String)> = Vec::new();
2045
2046 for event in parser {
2047 match event {
2048 Ok(XmlEvent::StartElement { name, attributes, .. }) => {
2049 let local = name.local_name.clone();
2050 let ns = name.namespace.as_deref().unwrap_or("").to_string();
2051
2052 if local == "Description" && ns == rdf_ns {
2054 if let Some(nid_attr) = attributes.iter().find(|a| {
2055 a.name.local_name == "nodeID"
2056 && (a.name.prefix.as_deref() == Some("rdf")
2057 || a.name.namespace.as_deref() == Some(rdf_ns))
2058 }) {
2059 let parent_is_top_level = path.last().map(|(pns, pln)| {
2063 (pln == "RDF" && pns == rdf_ns)
2064 || pln == "xmpmeta"
2065 || pln == "xapmeta"
2066 }).unwrap_or(true);
2067
2068 if !parent_is_top_level {
2069 set.insert(nid_attr.value.clone());
2070 }
2071 }
2072 }
2073
2074 path.push((ns, local));
2075 }
2076 Ok(XmlEvent::EndElement { .. }) => {
2077 path.pop();
2078 }
2079 Err(_) => continue,
2080 _ => {}
2081 }
2082 }
2083 set
2084}
2085
2086fn collect_blank_node_properties(xml: &str) -> std::collections::HashMap<String, Vec<(String, String, String)>> {
2091 use xml::reader::{EventReader, XmlEvent};
2092 let mut map: std::collections::HashMap<String, Vec<(String, String, String)>> = std::collections::HashMap::new();
2093 let parser = EventReader::from_str(xml);
2094 let mut current_node_id: Option<String> = None;
2095 let mut current_text = String::new();
2096 let mut in_property = false;
2097 let mut current_prop_ns = String::new();
2098 let mut current_prop_local = String::new();
2099 let rdf_ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
2100
2101 for event in parser {
2102 match event {
2103 Ok(XmlEvent::StartElement { name, attributes, .. }) => {
2104 current_text.clear();
2105 let local = &name.local_name;
2106 let ns = name.namespace.as_deref().unwrap_or("");
2107
2108 if local == "Description" && ns == rdf_ns {
2109 if let Some(nid_attr) = attributes.iter().find(|a| {
2111 a.name.local_name == "nodeID"
2112 && (a.name.prefix.as_deref() == Some("rdf") || a.name.namespace.as_deref() == Some(rdf_ns))
2113 }) {
2114 let nid = nid_attr.value.clone();
2115 current_node_id = Some(nid.clone());
2116 let entry = map.entry(nid).or_default();
2118 for attr in &attributes {
2119 if attr.name.local_name == "nodeID" || attr.name.local_name == "about" { continue; }
2120 if attr.name.prefix.as_deref() == Some("xmlns") { continue; }
2121 if attr.name.local_name.starts_with("xmlns") { continue; }
2122 if attr.name.prefix.as_deref() == Some("rdf") || attr.name.namespace.as_deref() == Some(rdf_ns) { continue; }
2123 let attr_ns = attr.name.namespace.as_deref().unwrap_or("").to_string();
2124 entry.push((attr_ns, attr.name.local_name.clone(), attr.value.clone()));
2125 }
2126 }
2127 } else if current_node_id.is_some() && ns != rdf_ns && local != "RDF" {
2128 in_property = true;
2130 current_prop_ns = ns.to_string();
2131 current_prop_local = local.clone();
2132 if let Some(res_attr) = attributes.iter().find(|a| {
2134 a.name.local_name == "resource" && a.name.namespace.as_deref() == Some(rdf_ns)
2135 }) {
2136 let nid = current_node_id.as_ref().unwrap().clone();
2137 let entry = map.entry(nid).or_default();
2138 entry.push((ns.to_string(), local.clone(), res_attr.value.clone()));
2139 in_property = false; }
2141 }
2142 }
2143 Ok(XmlEvent::Characters(text)) | Ok(XmlEvent::CData(text)) => {
2144 if in_property {
2145 current_text.push_str(&text);
2146 }
2147 }
2148 Ok(XmlEvent::EndElement { name }) => {
2149 let local = &name.local_name;
2150 let ns = name.namespace.as_deref().unwrap_or("");
2151 if local == "Description" && ns == rdf_ns {
2152 current_node_id = None;
2153 in_property = false;
2154 } else if in_property {
2155 let text = normalize_xml_text(¤t_text);
2157 if let Some(nid) = ¤t_node_id {
2158 let entry = map.entry(nid.clone()).or_default();
2159 entry.push((current_prop_ns.clone(), current_prop_local.clone(), text));
2160 }
2161 in_property = false;
2162 current_text.clear();
2163 }
2164 }
2165 Err(_) => continue,
2166 _ => {}
2167 }
2168 }
2169 map
2170}