1use crate::types::{TagEntry, TagValue};
7use anyhow::{Context, Result};
8use quick_xml::events::{BytesStart, Event};
9use quick_xml::name::{Namespace, ResolveResult};
10use quick_xml::reader::NsReader;
11use std::collections::HashMap;
12
13use crate::generated::XMP_pm::NAMESPACE_URIS;
15
16pub struct XmpProcessor {
18 uri_to_prefix: HashMap<String, String>,
20 current_ns_map: HashMap<String, String>,
22}
23
24impl Default for XmpProcessor {
25 fn default() -> Self {
26 Self::new()
27 }
28}
29
30impl XmpProcessor {
31 pub fn new() -> Self {
33 let mut uri_to_prefix = HashMap::new();
36
37 uri_to_prefix.insert("http://ns.exiftool.ca/1.0/".to_string(), "et".to_string());
39 uri_to_prefix.insert("http://ns.exiftool.org/1.0/".to_string(), "et".to_string());
40
41 for (prefix, uri) in NAMESPACE_URIS.iter() {
43 uri_to_prefix.insert(uri.to_string(), prefix.to_string());
44 }
45
46 Self {
47 uri_to_prefix,
48 current_ns_map: HashMap::new(),
49 }
50 }
51
52 pub fn process_xmp_data(&mut self, data: &[u8]) -> Result<TagEntry> {
57 let processed_data = self.strip_bom(data);
59
60 let xmp_str =
62 std::str::from_utf8(&processed_data).context("XMP data is not valid UTF-8")?;
63
64 let xmp_structure = self.parse_xmp_xml(xmp_str)?;
66
67 Ok(TagEntry {
69 group: "XMP".to_string(),
70 group1: "XMP".to_string(),
71 name: "XMP".to_string(),
72 value: TagValue::Object(xmp_structure.clone()),
73 print: TagValue::Object(xmp_structure),
74 })
75 }
76
77 fn strip_bom<'a>(&self, data: &'a [u8]) -> std::borrow::Cow<'a, [u8]> {
79 use std::borrow::Cow;
80
81 if data.starts_with(b"\xEF\xBB\xBF") {
83 return Cow::Borrowed(&data[3..]);
84 }
85
86 if data.starts_with(b"\xFE\xFF") {
88 return Cow::Owned(self.convert_utf16_be_to_utf8(&data[2..]));
89 }
90
91 if data.starts_with(b"\xFF\xFE") {
93 return Cow::Owned(self.convert_utf16_le_to_utf8(&data[2..]));
94 }
95
96 if data.len() >= 4 && data[0] == b'<' && data[1] == 0 {
98 return Cow::Owned(self.convert_utf16_le_to_utf8(data));
99 }
100
101 if data.len() >= 4 && data[0] == 0 && data[1] == b'<' {
103 return Cow::Owned(self.convert_utf16_be_to_utf8(data));
104 }
105
106 Cow::Borrowed(data)
107 }
108
109 fn convert_utf16_le_to_utf8(&self, data: &[u8]) -> Vec<u8> {
111 let utf16_chars: Vec<u16> = data
113 .chunks_exact(2)
114 .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
115 .collect();
116
117 String::from_utf16_lossy(&utf16_chars).into_bytes()
119 }
120
121 fn convert_utf16_be_to_utf8(&self, data: &[u8]) -> Vec<u8> {
123 let utf16_chars: Vec<u16> = data
125 .chunks_exact(2)
126 .map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]]))
127 .collect();
128
129 String::from_utf16_lossy(&utf16_chars).into_bytes()
131 }
132
133 fn parse_xmp_xml(&mut self, xml: &str) -> Result<HashMap<String, TagValue>> {
135 let mut reader = NsReader::from_str(xml);
136 reader.config_mut().trim_text(true);
137
138 let mut buf = Vec::new();
139 let mut root_object = HashMap::new();
140 let mut namespace_objects: HashMap<String, HashMap<String, TagValue>> = HashMap::new();
141
142 self.current_ns_map.clear();
144
145 let mut element_stack: Vec<ElementContext> = Vec::new();
147
148 loop {
149 match reader.read_resolved_event_into(&mut buf) {
150 Ok((ns_result, Event::Start(e))) => {
151 let element_local_name = e.local_name();
152 let local_name = std::str::from_utf8(element_local_name.as_ref())
153 .context("Invalid UTF-8 in element name")?;
154
155 let namespace_uri = match ns_result {
157 ResolveResult::Bound(Namespace(ns_bytes)) => Some(
158 std::str::from_utf8(ns_bytes)
159 .context("Invalid UTF-8 in namespace URI")?
160 .to_string(),
161 ),
162 _ => None,
163 };
164
165 self.process_start_element(
167 &e,
168 local_name,
169 namespace_uri.as_deref(),
170 &reader,
171 &mut element_stack,
172 &mut namespace_objects,
173 )?;
174 }
175 Ok((_, Event::Text(e))) => {
176 let text = e.decode()?.into_owned();
177 if !text.trim().is_empty() {
178 self.process_text_content(
179 text,
180 &mut element_stack,
181 &mut namespace_objects,
182 )?;
183 }
184 }
185 Ok((_, Event::End(_))) => {
186 if !element_stack.is_empty() {
187 element_stack.pop();
188 }
189 }
190 Ok((_, Event::Eof)) => break,
191 Err(e) => return Err(anyhow::anyhow!("XML parsing error: {}", e)),
192 _ => {} }
194 buf.clear();
195 }
196
197 for (ns_prefix, properties) in namespace_objects {
199 if !properties.is_empty() {
200 root_object.insert(ns_prefix, TagValue::Object(properties));
201 }
202 }
203
204 Ok(root_object)
205 }
206
207 fn process_start_element(
209 &mut self,
210 element: &BytesStart,
211 local_name: &str,
212 namespace_uri: Option<&str>,
213 reader: &NsReader<&[u8]>,
214 element_stack: &mut Vec<ElementContext>,
215 _namespace_objects: &mut HashMap<String, HashMap<String, TagValue>>,
216 ) -> Result<()> {
217 for attr in element.attributes() {
219 let attr = attr?;
220 let key = std::str::from_utf8(attr.key.as_ref())?;
221
222 if let Some(prefix) = key.strip_prefix("xmlns:") {
224 let uri = std::str::from_utf8(&attr.value)?;
225 self.current_ns_map
227 .insert(prefix.to_string(), uri.to_string());
228 }
229 }
230
231 let container_type = match local_name {
233 "Bag" => Some(RdfContainerType::Bag),
234 "Seq" => Some(RdfContainerType::Seq),
235 "Alt" => Some(RdfContainerType::Alt),
236 _ => None,
237 };
238
239 let mut lang_attr = None;
241 for attr in element.attributes() {
242 let attr = attr?;
243 let (_, attr_local) = reader.resolve_attribute(attr.key);
244 let attr_name = std::str::from_utf8(attr_local.as_ref())?;
245
246 if attr_name == "lang" {
247 lang_attr = Some(std::str::from_utf8(&attr.value)?.to_string());
248 }
249 }
250
251 let property_ns = if let Some(uri) = namespace_uri {
253 self.get_namespace_prefix(uri)
254 } else {
255 None
256 };
257
258 let context = ElementContext {
260 local_name: local_name.to_string(),
261 namespace_prefix: property_ns,
262 container_type,
263 language: lang_attr,
264 is_rdf_li: local_name == "li",
265 };
266
267 element_stack.push(context);
268
269 Ok(())
270 }
271
272 fn process_text_content(
274 &self,
275 text: String,
276 element_stack: &mut [ElementContext],
277 namespace_objects: &mut HashMap<String, HashMap<String, TagValue>>,
278 ) -> Result<()> {
279 if element_stack.len() < 2 {
280 return Ok(()); }
282
283 let mut property_element = None;
286 let mut container_element = None;
287
288 for i in (0..element_stack.len()).rev() {
290 let elem = &element_stack[i];
291
292 if elem.local_name == "li"
294 || elem.local_name == "Description"
295 || elem.local_name == "RDF"
296 {
297 continue;
298 }
299
300 if elem.container_type.is_some() {
302 container_element = Some(elem);
303 if i > 0 {
305 let prev = &element_stack[i - 1];
306 if prev.namespace_prefix.is_some() && prev.container_type.is_none() {
307 property_element = Some(prev);
308 break;
309 }
310 }
311 } else if elem.namespace_prefix.is_some() && property_element.is_none() {
312 property_element = Some(elem);
314 if container_element.is_some() {
315 break;
316 }
317 }
318 }
319
320 if let Some(prop) = property_element {
322 if let Some(ns) = &prop.namespace_prefix {
323 let ns_object = namespace_objects.entry(ns.clone()).or_default();
324
325 let property_name = prop.local_name.clone();
326
327 if let Some(container) = container_element {
329 match container.container_type {
330 Some(RdfContainerType::Bag) | Some(RdfContainerType::Seq) => {
331 let array = ns_object
333 .entry(property_name)
334 .or_insert_with(|| TagValue::Array(Vec::new()));
335
336 if let Some(arr) = array.as_array_mut() {
337 arr.push(TagValue::string(text));
338 }
339 }
340 Some(RdfContainerType::Alt) => {
341 let alt_object = ns_object
343 .entry(property_name)
344 .or_insert_with(|| TagValue::Object(HashMap::new()));
345
346 if let Some(obj) = alt_object.as_object_mut() {
347 let current = &element_stack[element_stack.len() - 1];
348 let lang_key = current.language.as_deref().unwrap_or("x-default");
349 obj.insert(lang_key.to_string(), TagValue::string(text));
350 }
351 }
352 None => {
353 }
355 }
356 } else {
357 ns_object.insert(property_name, TagValue::string(text));
359 }
360 }
361 }
362
363 Ok(())
364 }
365
366 fn get_namespace_prefix(&self, uri: &str) -> Option<String> {
369 self.uri_to_prefix.get(uri).cloned()
372 }
373
374 #[allow(dead_code)]
377 fn extract_prefix_from_uri(&self, uri: &str) -> String {
378 if uri.contains("/dc/") {
380 return "dc".to_string();
381 }
382 if uri.contains("/xmp/") || uri.contains("/xap/") {
383 return "xmp".to_string();
384 }
385 if uri.contains("/exif/") {
386 return "exif".to_string();
387 }
388 if uri.contains("/tiff/") {
389 return "tiff".to_string();
390 }
391 if uri.contains("/photoshop/") {
392 return "photoshop".to_string();
393 }
394 if uri.contains("/crs/") {
395 return "crs".to_string();
396 }
397
398 uri.trim_end_matches('/')
400 .split('/')
401 .next_back()
402 .unwrap_or("unknown")
403 .split('#')
404 .next_back()
405 .unwrap_or("unknown")
406 .to_string()
407 }
408}
409
410#[derive(Debug)]
412struct ElementContext {
413 local_name: String,
414 namespace_prefix: Option<String>,
415 container_type: Option<RdfContainerType>,
416 language: Option<String>,
417 #[allow(dead_code)]
419 is_rdf_li: bool,
420}
421
422#[derive(Debug, Clone, Copy)]
424enum RdfContainerType {
425 Bag, Seq, Alt, }
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
435 fn test_minimal_xmp() {
436 let xmp_data = r#"<?xml version="1.0"?>
437<x:xmpmeta xmlns:x="adobe:ns:meta/">
438 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
439 <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">
440 <dc:title>Test Title</dc:title>
441 </rdf:Description>
442 </rdf:RDF>
443</x:xmpmeta>"#;
444
445 let mut processor = XmpProcessor::new();
446 let result = processor.process_xmp_data(xmp_data.as_bytes()).unwrap();
447
448 assert_eq!(result.name, "XMP");
449 if let TagValue::Object(xmp) = &result.value {
450 eprintln!("Minimal XMP keys: {:?}", xmp.keys().collect::<Vec<_>>());
451 for (key, value) in xmp {
452 eprintln!(" {key}: {value:?}");
453 }
454 }
455 }
456
457 #[test]
458 fn test_simple_xmp_parsing() {
459 let xmp_data = r#"<?xml version="1.0"?>
460<x:xmpmeta xmlns:x="adobe:ns:meta/">
461 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
462 <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/">
463 <dc:creator>
464 <rdf:Seq>
465 <rdf:li>John Doe</rdf:li>
466 <rdf:li>Jane Smith</rdf:li>
467 </rdf:Seq>
468 </dc:creator>
469 <dc:title>
470 <rdf:Alt>
471 <rdf:li xml:lang="x-default">Test Photo</rdf:li>
472 <rdf:li xml:lang="en-US">Test Photo</rdf:li>
473 </rdf:Alt>
474 </dc:title>
475 </rdf:Description>
476 </rdf:RDF>
477</x:xmpmeta>"#;
478
479 let mut processor = XmpProcessor::new();
480 let result = processor.process_xmp_data(xmp_data.as_bytes()).unwrap();
481
482 assert_eq!(result.name, "XMP");
483 assert!(matches!(result.value, TagValue::Object(_)));
484
485 if let TagValue::Object(xmp) = &result.value {
487 eprintln!("XMP structure keys: {:?}", xmp.keys().collect::<Vec<_>>());
489 for (key, value) in xmp {
490 eprintln!(" {key}: {value:?}");
491 }
492
493 assert!(!xmp.is_empty(), "XMP structure should not be empty");
495
496 if let Some(TagValue::Object(dc)) = xmp.get("dc") {
500 if let Some(TagValue::Array(creators)) = dc.get("creator") {
502 assert_eq!(creators.len(), 2);
503 assert_eq!(creators[0].as_string(), Some("John Doe"));
504 assert_eq!(creators[1].as_string(), Some("Jane Smith"));
505 }
506
507 if let Some(TagValue::Object(titles)) = dc.get("title") {
509 assert_eq!(
510 titles.get("x-default").unwrap().as_string(),
511 Some("Test Photo")
512 );
513 assert_eq!(titles.get("en-US").unwrap().as_string(), Some("Test Photo"));
514 }
515 }
516 }
517 }
518}