1use crate::error::ParseError;
8use crate::parser::namespace_detector::NamespaceContext;
9use ddex_core::models::{AttributeMap, AttributeType, AttributeValue, QName};
10use indexmap::IndexMap;
11use quick_xml::events::{attributes::Attribute, BytesStart};
12use std::collections::HashMap;
13use tracing::{debug, warn};
14
15#[derive(Debug, Clone)]
17pub struct AttributeExtractor {
18 ddex_attribute_types: HashMap<String, AttributeType>,
20 special_attributes: IndexMap<String, SpecialAttributeHandler>,
22}
23
24#[derive(Debug, Clone)]
26pub enum SpecialAttributeHandler {
27 XsiType,
29 XsiSchemaLocation,
31 XsiNoNamespaceSchemaLocation,
33 XsiNil,
35 NamespaceDeclaration,
37 LanguageAndTerritory,
39 SequenceNumber,
41 BooleanFlag,
43}
44
45#[derive(Debug, Clone)]
47pub struct AttributeExtractionResult {
48 pub attributes: AttributeMap,
50 pub standard_attributes: IndexMap<QName, AttributeValue>,
52 pub extension_attributes: IndexMap<QName, AttributeValue>,
54 pub namespace_declarations: IndexMap<String, String>,
56 pub special_attributes: IndexMap<QName, SpecialAttributeValue>,
58 pub warnings: Vec<String>,
60}
61
62#[derive(Debug, Clone, PartialEq)]
64pub enum SpecialAttributeValue {
65 XsiType {
67 type_name: String,
68 namespace_uri: Option<String>,
69 resolved_type: Option<String>,
70 },
71 SchemaLocation {
73 locations: IndexMap<String, String>, },
75 NoNamespaceSchemaLocation(String),
77 Nil(bool),
79 Language {
81 language: String,
82 script: Option<String>,
83 territory: Option<String>,
84 },
85 Territory(Vec<String>),
87 Sequence(u32),
89 Flag(bool),
91}
92
93impl AttributeExtractor {
94 pub fn new() -> Self {
96 let mut extractor = Self {
97 ddex_attribute_types: HashMap::new(),
98 special_attributes: IndexMap::new(),
99 };
100
101 extractor.initialize_ddex_attributes();
102 extractor.initialize_special_handlers();
103 extractor
104 }
105
106 fn initialize_ddex_attributes(&mut self) {
108 self.ddex_attribute_types
110 .insert("LanguageAndScriptCode".to_string(), AttributeType::Language);
111 self.ddex_attribute_types
112 .insert("ApplicableTerritoryCode".to_string(), AttributeType::String);
113
114 self.ddex_attribute_types
116 .insert("IsDefault".to_string(), AttributeType::Boolean);
117 self.ddex_attribute_types
118 .insert("IsMainArtist".to_string(), AttributeType::Boolean);
119 self.ddex_attribute_types
120 .insert("HasChanged".to_string(), AttributeType::Boolean);
121
122 self.ddex_attribute_types
124 .insert("SequenceNumber".to_string(), AttributeType::Integer);
125 self.ddex_attribute_types
126 .insert("Duration".to_string(), AttributeType::String); self.ddex_attribute_types
130 .insert("Namespace".to_string(), AttributeType::Uri);
131
132 self.ddex_attribute_types
134 .insert("CreatedDateTime".to_string(), AttributeType::DateTime);
135 self.ddex_attribute_types
136 .insert("UpdatedDateTime".to_string(), AttributeType::DateTime);
137 }
138
139 fn initialize_special_handlers(&mut self) {
141 self.special_attributes
143 .insert("xsi:type".to_string(), SpecialAttributeHandler::XsiType);
144 self.special_attributes.insert(
145 "xsi:schemaLocation".to_string(),
146 SpecialAttributeHandler::XsiSchemaLocation,
147 );
148 self.special_attributes.insert(
149 "xsi:noNamespaceSchemaLocation".to_string(),
150 SpecialAttributeHandler::XsiNoNamespaceSchemaLocation,
151 );
152 self.special_attributes
153 .insert("xsi:nil".to_string(), SpecialAttributeHandler::XsiNil);
154
155 self.special_attributes.insert(
157 "xmlns".to_string(),
158 SpecialAttributeHandler::NamespaceDeclaration,
159 );
160 self.special_attributes.insert(
164 "LanguageAndScriptCode".to_string(),
165 SpecialAttributeHandler::LanguageAndTerritory,
166 );
167 self.special_attributes.insert(
168 "ApplicableTerritoryCode".to_string(),
169 SpecialAttributeHandler::LanguageAndTerritory,
170 );
171 self.special_attributes.insert(
172 "SequenceNumber".to_string(),
173 SpecialAttributeHandler::SequenceNumber,
174 );
175
176 self.special_attributes.insert(
178 "IsDefault".to_string(),
179 SpecialAttributeHandler::BooleanFlag,
180 );
181 self.special_attributes.insert(
182 "IsMainArtist".to_string(),
183 SpecialAttributeHandler::BooleanFlag,
184 );
185 }
186
187 pub fn extract_attributes(
189 &self,
190 element: &BytesStart,
191 namespace_context: &NamespaceContext,
192 ) -> Result<AttributeExtractionResult, ParseError> {
193 let mut attributes = AttributeMap::new();
194 let mut namespace_declarations = IndexMap::new();
195 let mut special_attributes = IndexMap::new();
196 let warnings = Vec::new();
197
198 debug!(
199 "Extracting attributes from element: {}",
200 String::from_utf8_lossy(element.name().as_ref())
201 );
202
203 for attr_result in element.attributes() {
205 let attr = attr_result.map_err(|e| ParseError::XmlError(format!("Failed to read attribute: {}", e)))?;
206
207 let (qname, attr_value) = self.process_attribute(&attr, namespace_context)?;
208
209 if qname.is_namespace_declaration() {
211 let prefix = if qname.local_name == "xmlns" {
212 "".to_string() } else {
214 qname.local_name.clone() };
216 namespace_declarations.insert(prefix, attr_value.to_xml_value());
217 debug!(
218 "Found namespace declaration: {}={}",
219 qname.to_xml_name(),
220 attr_value.to_xml_value()
221 );
222 }
223
224 if let Some(special_value) =
226 self.process_special_attribute(&qname, &attr_value, namespace_context)?
227 {
228 special_attributes.insert(qname.clone(), special_value);
229 }
230
231 attributes.insert(qname, attr_value);
233 }
234
235 let standard_attributes = attributes.standard_attributes();
237 let extension_attributes = attributes.extension_attributes();
238
239 debug!(
240 "Extracted {} total attributes ({} standard, {} extensions)",
241 attributes.len(),
242 standard_attributes.len(),
243 extension_attributes.len()
244 );
245
246 Ok(AttributeExtractionResult {
247 attributes,
248 standard_attributes,
249 extension_attributes,
250 namespace_declarations,
251 special_attributes,
252 warnings,
253 })
254 }
255
256 fn process_attribute(
258 &self,
259 attr: &Attribute,
260 namespace_context: &NamespaceContext,
261 ) -> Result<(QName, AttributeValue), ParseError> {
262 let attr_name = String::from_utf8_lossy(attr.key.as_ref());
263 let attr_value = String::from_utf8_lossy(&attr.value);
264
265 debug!("Processing attribute: {}={}", attr_name, attr_value);
266
267 let qname = self.resolve_attribute_qname(&attr_name, namespace_context);
269
270 let parsed_value = if let Some(attr_type) = self.get_attribute_type(&qname) {
272 AttributeValue::parse_with_type(&attr_value, attr_type).unwrap_or_else(|e| {
273 warn!(
274 "Failed to parse attribute {} as {:?}: {}",
275 qname, attr_type, e
276 );
277 AttributeValue::Raw(attr_value.to_string())
278 })
279 } else {
280 AttributeValue::String(attr_value.to_string())
282 };
283
284 Ok((qname, parsed_value))
285 }
286
287 fn resolve_attribute_qname(
289 &self,
290 attr_name: &str,
291 namespace_context: &NamespaceContext,
292 ) -> QName {
293 if let Some((prefix, local_name)) = attr_name.split_once(':') {
294 if let Some(namespace_uri) = namespace_context.current_scope.resolve_prefix(prefix) {
296 QName::with_prefix_and_namespace(local_name, prefix, namespace_uri)
297 } else {
298 warn!("Unresolved namespace prefix in attribute: {}", attr_name);
300 QName {
301 local_name: local_name.to_string(),
302 namespace_uri: None,
303 prefix: Some(prefix.to_string()),
304 }
305 }
306 } else {
307 if attr_name == "xmlns" || attr_name.starts_with("xmlns:") {
309 QName::new(attr_name)
310 } else {
311 QName::new(attr_name)
313 }
314 }
315 }
316
317 fn get_attribute_type(&self, qname: &QName) -> Option<AttributeType> {
319 if let Some(attr_type) = self.ddex_attribute_types.get(&qname.to_xml_name()) {
321 return Some(*attr_type);
322 }
323
324 self.ddex_attribute_types.get(&qname.local_name).copied()
326 }
327
328 fn process_special_attribute(
330 &self,
331 qname: &QName,
332 value: &AttributeValue,
333 namespace_context: &NamespaceContext,
334 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
335 let attr_name = qname.to_xml_name();
336
337 if let Some(handler) = self.special_attributes.get(&attr_name) {
338 match handler {
339 SpecialAttributeHandler::XsiType => self.process_xsi_type(value, namespace_context),
340 SpecialAttributeHandler::XsiSchemaLocation => self.process_schema_location(value),
341 SpecialAttributeHandler::XsiNoNamespaceSchemaLocation => Ok(Some(
342 SpecialAttributeValue::NoNamespaceSchemaLocation(value.to_xml_value()),
343 )),
344 SpecialAttributeHandler::XsiNil => self.process_xsi_nil(value),
345 SpecialAttributeHandler::NamespaceDeclaration => {
346 Ok(None)
348 }
349 SpecialAttributeHandler::LanguageAndTerritory => {
350 self.process_language_territory(value)
351 }
352 SpecialAttributeHandler::SequenceNumber => self.process_sequence_number(value),
353 SpecialAttributeHandler::BooleanFlag => self.process_boolean_flag(value),
354 }
355 } else {
356 Ok(None)
357 }
358 }
359
360 fn process_xsi_type(
362 &self,
363 value: &AttributeValue,
364 namespace_context: &NamespaceContext,
365 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
366 let type_value = value.to_xml_value();
367
368 if let Some((prefix, local_name)) = type_value.split_once(':') {
369 let namespace_uri = namespace_context.current_scope.resolve_prefix(prefix);
371 Ok(Some(SpecialAttributeValue::XsiType {
372 type_name: local_name.to_string(),
373 namespace_uri,
374 resolved_type: None, }))
376 } else {
377 Ok(Some(SpecialAttributeValue::XsiType {
379 type_name: type_value,
380 namespace_uri: None,
381 resolved_type: None,
382 }))
383 }
384 }
385
386 fn process_schema_location(
388 &self,
389 value: &AttributeValue,
390 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
391 let location_value = value.to_xml_value();
392 let mut locations = IndexMap::new();
393
394 let tokens: Vec<&str> = location_value.split_whitespace().collect();
396 for chunk in tokens.chunks(2) {
397 if chunk.len() == 2 {
398 locations.insert(chunk[0].to_string(), chunk[1].to_string());
399 }
400 }
401
402 Ok(Some(SpecialAttributeValue::SchemaLocation { locations }))
403 }
404
405 fn process_xsi_nil(
407 &self,
408 value: &AttributeValue,
409 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
410 match value {
411 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Nil(*b))),
412 _ => {
413 let str_val = value.to_xml_value();
414 let nil_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
415 Ok(Some(SpecialAttributeValue::Nil(nil_val)))
416 }
417 }
418 }
419
420 fn process_language_territory(
422 &self,
423 value: &AttributeValue,
424 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
425 let lang_value = value.to_xml_value();
426
427 if lang_value.contains('-') {
429 let parts: Vec<&str> = lang_value.split('-').collect();
430 let language = parts[0].to_string();
431 let territory = if parts.len() > 1 {
432 Some(parts[1].to_string())
433 } else {
434 None
435 };
436
437 Ok(Some(SpecialAttributeValue::Language {
438 language,
439 script: None, territory,
441 }))
442 } else if lang_value.contains(' ') {
443 let territories: Vec<String> = lang_value
445 .split_whitespace()
446 .map(|s| s.to_string())
447 .collect();
448 Ok(Some(SpecialAttributeValue::Territory(territories)))
449 } else {
450 Ok(Some(SpecialAttributeValue::Language {
451 language: lang_value,
452 script: None,
453 territory: None,
454 }))
455 }
456 }
457
458 fn process_sequence_number(
460 &self,
461 value: &AttributeValue,
462 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
463 match value {
464 AttributeValue::Integer(i) => Ok(Some(SpecialAttributeValue::Sequence(*i as u32))),
465 _ => {
466 if let Ok(seq) = value.to_xml_value().parse::<u32>() {
467 Ok(Some(SpecialAttributeValue::Sequence(seq)))
468 } else {
469 Ok(None)
470 }
471 }
472 }
473 }
474
475 fn process_boolean_flag(
477 &self,
478 value: &AttributeValue,
479 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
480 match value {
481 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Flag(*b))),
482 _ => {
483 let str_val = value.to_xml_value();
484 let bool_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
485 Ok(Some(SpecialAttributeValue::Flag(bool_val)))
486 }
487 }
488 }
489
490 pub fn apply_inheritance(
492 &self,
493 parent_attributes: &AttributeMap,
494 child_attributes: &mut AttributeMap,
495 ) {
496 let inheritance = ddex_core::models::AttributeInheritance::new();
497 inheritance.apply_inheritance(parent_attributes, child_attributes);
498 }
499
500 pub fn validate_attributes(&self, attributes: &AttributeMap) -> Vec<String> {
502 let mut errors = Vec::new();
503
504 for (qname, value) in attributes.iter() {
505 if let Err(e) = value.validate() {
506 errors.push(format!("Invalid attribute {}: {}", qname, e));
507 }
508 }
509
510 errors
511 }
512}
513
514impl Default for AttributeExtractor {
515 fn default() -> Self {
516 Self::new()
517 }
518}
519
520#[cfg(test)]
521mod tests {
522 use super::*;
523 use quick_xml::Reader;
524 use std::io::Cursor;
525
526 #[test]
527 fn test_attribute_extraction_basic() {
528 let xml = r#"<Release title="Test Album" SequenceNumber="1" IsDefault="true" />"#;
529 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
530 let mut buf = Vec::new();
531
532 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
533 let extractor = AttributeExtractor::new();
534 let namespace_context = NamespaceContext {
535 current_scope: ddex_core::namespace::NamespaceScope::new(),
536 document_namespaces: indexmap::IndexMap::new(),
537 default_namespace: None,
538 ern_version: None,
539 };
540
541 let result = extractor
542 .extract_attributes(&start, &namespace_context)
543 .unwrap();
544
545 assert_eq!(result.attributes.len(), 3);
546 assert_eq!(
547 result.attributes.get_str("title").unwrap().to_xml_value(),
548 "Test Album"
549 );
550 assert_eq!(
551 result
552 .attributes
553 .get_str("SequenceNumber")
554 .unwrap()
555 .to_xml_value(),
556 "1"
557 );
558 assert_eq!(
559 result
560 .attributes
561 .get_str("IsDefault")
562 .unwrap()
563 .to_xml_value(),
564 "true"
565 );
566
567 if let Some(AttributeValue::Integer(seq)) = result.attributes.get_str("SequenceNumber")
569 {
570 assert_eq!(*seq, 1);
571 } else {
572 panic!("SequenceNumber should be parsed as integer");
573 }
574
575 if let Some(AttributeValue::Boolean(is_default)) =
576 result.attributes.get_str("IsDefault")
577 {
578 assert_eq!(*is_default, true);
579 } else {
580 panic!("IsDefault should be parsed as boolean");
581 }
582 }
583 }
584
585 #[test]
586 fn test_namespace_attribute_extraction() {
587 let xml = r#"<ern:Release xmlns:ern="http://ddex.net/xml/ern/43"
588 xmlns:avs="http://ddex.net/xml/avs"
589 ern:title="Test" />"#;
590 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
591 let mut buf = Vec::new();
592
593 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
594 let extractor = AttributeExtractor::new();
595 let namespace_context = NamespaceContext {
596 current_scope: ddex_core::namespace::NamespaceScope::new(),
597 document_namespaces: indexmap::IndexMap::new(),
598 default_namespace: None,
599 ern_version: None,
600 };
601
602 let result = extractor
603 .extract_attributes(&start, &namespace_context)
604 .unwrap();
605
606 assert_eq!(result.namespace_declarations.len(), 2);
607 assert!(result.namespace_declarations.contains_key("ern"));
608 assert!(result.namespace_declarations.contains_key("avs"));
609 }
610 }
611
612 #[test]
613 fn test_special_attribute_processing() {
614 let xml = r#"<element xsi:type="xs:string"
615 xsi:nil="true"
616 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
617 xmlns:xs="http://www.w3.org/2001/XMLSchema" />"#;
618 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
619 let mut buf = Vec::new();
620
621 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
622 let extractor = AttributeExtractor::new();
623 let namespace_context = NamespaceContext {
624 current_scope: ddex_core::namespace::NamespaceScope::new(),
625 document_namespaces: indexmap::IndexMap::new(),
626 default_namespace: None,
627 ern_version: None,
628 };
629
630 let result = extractor
631 .extract_attributes(&start, &namespace_context)
632 .unwrap();
633
634 assert!(!result.special_attributes.is_empty());
635
636 let xsi_nil_qname = QName::with_prefix_and_namespace(
638 "nil".to_string(),
639 "xsi".to_string(),
640 "http://www.w3.org/2001/XMLSchema-instance".to_string(),
641 );
642 if let Some(SpecialAttributeValue::Nil(nil_value)) =
643 result.special_attributes.get(&xsi_nil_qname)
644 {
645 assert_eq!(*nil_value, true);
646 }
647 }
648 }
649
650 #[test]
651 fn test_attribute_inheritance() {
652 let mut parent_attrs = AttributeMap::new();
653 parent_attrs.insert_str("LanguageAndScriptCode", "en-US");
654 parent_attrs.insert_str("ApplicableTerritoryCode", "Worldwide");
655
656 let mut child_attrs = AttributeMap::new();
657 child_attrs.insert_str("title", "Child Title");
658
659 let extractor = AttributeExtractor::new();
660 extractor.apply_inheritance(&parent_attrs, &mut child_attrs);
661
662 assert!(child_attrs.get_str("LanguageAndScriptCode").is_some());
664 assert!(child_attrs.get_str("ApplicableTerritoryCode").is_some());
665 assert!(child_attrs.get_str("title").is_some());
666 }
667
668 #[test]
669 fn test_ddex_standard_vs_extension_attributes() {
670 let mut attributes = AttributeMap::new();
671 attributes.insert_str("LanguageAndScriptCode", "en-US"); attributes.insert_str("custom:proprietary", "custom value"); attributes.insert_str("xmlns:custom", "http://example.com/custom"); let standard = attributes.standard_attributes();
676 let extensions = attributes.extension_attributes();
677
678 assert!(standard.len() >= 1); assert!(extensions.len() >= 1); }
681}