1use crate::error::ParseError;
8use crate::parser::namespace_detector::NamespaceContext;
9use ddex_core::models::{AttributeMap, AttributeType, AttributeValue, QName};
10use indexmap::IndexMap;
11use quick_xml::events::{attributes::Attribute, BytesStart};
12use std::collections::HashMap;
13use tracing::{debug, warn};
14
15#[derive(Debug, Clone)]
17pub struct AttributeExtractor {
18 ddex_attribute_types: HashMap<String, AttributeType>,
20 special_attributes: IndexMap<String, SpecialAttributeHandler>,
22}
23
24#[derive(Debug, Clone)]
26pub enum SpecialAttributeHandler {
27 XsiType,
29 XsiSchemaLocation,
31 XsiNoNamespaceSchemaLocation,
33 XsiNil,
35 NamespaceDeclaration,
37 LanguageAndTerritory,
39 SequenceNumber,
41 BooleanFlag,
43}
44
45#[derive(Debug, Clone)]
47pub struct AttributeExtractionResult {
48 pub attributes: AttributeMap,
50 pub standard_attributes: IndexMap<QName, AttributeValue>,
52 pub extension_attributes: IndexMap<QName, AttributeValue>,
54 pub namespace_declarations: IndexMap<String, String>,
56 pub special_attributes: IndexMap<QName, SpecialAttributeValue>,
58 pub warnings: Vec<String>,
60}
61
62#[derive(Debug, Clone, PartialEq)]
64pub enum SpecialAttributeValue {
65 XsiType {
67 type_name: String,
68 namespace_uri: Option<String>,
69 resolved_type: Option<String>,
70 },
71 SchemaLocation {
73 locations: IndexMap<String, String>, },
75 NoNamespaceSchemaLocation(String),
77 Nil(bool),
79 Language {
81 language: String,
82 script: Option<String>,
83 territory: Option<String>,
84 },
85 Territory(Vec<String>),
87 Sequence(u32),
89 Flag(bool),
91}
92
93impl AttributeExtractor {
94 pub fn new() -> Self {
96 let mut extractor = Self {
97 ddex_attribute_types: HashMap::new(),
98 special_attributes: IndexMap::new(),
99 };
100
101 extractor.initialize_ddex_attributes();
102 extractor.initialize_special_handlers();
103 extractor
104 }
105
106 fn initialize_ddex_attributes(&mut self) {
108 self.ddex_attribute_types
110 .insert("LanguageAndScriptCode".to_string(), AttributeType::Language);
111 self.ddex_attribute_types
112 .insert("ApplicableTerritoryCode".to_string(), AttributeType::String);
113
114 self.ddex_attribute_types
116 .insert("IsDefault".to_string(), AttributeType::Boolean);
117 self.ddex_attribute_types
118 .insert("IsMainArtist".to_string(), AttributeType::Boolean);
119 self.ddex_attribute_types
120 .insert("HasChanged".to_string(), AttributeType::Boolean);
121
122 self.ddex_attribute_types
124 .insert("SequenceNumber".to_string(), AttributeType::Integer);
125 self.ddex_attribute_types
126 .insert("Duration".to_string(), AttributeType::String); self.ddex_attribute_types
130 .insert("Namespace".to_string(), AttributeType::Uri);
131
132 self.ddex_attribute_types
134 .insert("CreatedDateTime".to_string(), AttributeType::DateTime);
135 self.ddex_attribute_types
136 .insert("UpdatedDateTime".to_string(), AttributeType::DateTime);
137 }
138
139 fn initialize_special_handlers(&mut self) {
141 self.special_attributes
143 .insert("xsi:type".to_string(), SpecialAttributeHandler::XsiType);
144 self.special_attributes.insert(
145 "xsi:schemaLocation".to_string(),
146 SpecialAttributeHandler::XsiSchemaLocation,
147 );
148 self.special_attributes.insert(
149 "xsi:noNamespaceSchemaLocation".to_string(),
150 SpecialAttributeHandler::XsiNoNamespaceSchemaLocation,
151 );
152 self.special_attributes
153 .insert("xsi:nil".to_string(), SpecialAttributeHandler::XsiNil);
154
155 self.special_attributes.insert(
157 "xmlns".to_string(),
158 SpecialAttributeHandler::NamespaceDeclaration,
159 );
160 self.special_attributes.insert(
164 "LanguageAndScriptCode".to_string(),
165 SpecialAttributeHandler::LanguageAndTerritory,
166 );
167 self.special_attributes.insert(
168 "ApplicableTerritoryCode".to_string(),
169 SpecialAttributeHandler::LanguageAndTerritory,
170 );
171 self.special_attributes.insert(
172 "SequenceNumber".to_string(),
173 SpecialAttributeHandler::SequenceNumber,
174 );
175
176 self.special_attributes.insert(
178 "IsDefault".to_string(),
179 SpecialAttributeHandler::BooleanFlag,
180 );
181 self.special_attributes.insert(
182 "IsMainArtist".to_string(),
183 SpecialAttributeHandler::BooleanFlag,
184 );
185 }
186
187 pub fn extract_attributes(
189 &self,
190 element: &BytesStart,
191 namespace_context: &NamespaceContext,
192 ) -> Result<AttributeExtractionResult, ParseError> {
193 let mut attributes = AttributeMap::new();
194 let mut namespace_declarations = IndexMap::new();
195 let mut special_attributes = IndexMap::new();
196 let warnings = Vec::new();
197
198 debug!(
199 "Extracting attributes from element: {}",
200 String::from_utf8_lossy(element.name().as_ref())
201 );
202
203 for attr_result in element.attributes() {
205 let attr = attr_result.map_err(|e| ParseError::XmlError {
206 message: format!("Failed to read attribute: {}", e),
207 location: crate::error::ErrorLocation::default(),
208 })?;
209
210 let (qname, attr_value) = self.process_attribute(&attr, namespace_context)?;
211
212 if qname.is_namespace_declaration() {
214 let prefix = if qname.local_name == "xmlns" {
215 "".to_string() } else {
217 qname.local_name.clone() };
219 namespace_declarations.insert(prefix, attr_value.to_xml_value());
220 debug!(
221 "Found namespace declaration: {}={}",
222 qname.to_xml_name(),
223 attr_value.to_xml_value()
224 );
225 }
226
227 if let Some(special_value) =
229 self.process_special_attribute(&qname, &attr_value, namespace_context)?
230 {
231 special_attributes.insert(qname.clone(), special_value);
232 }
233
234 attributes.insert(qname, attr_value);
236 }
237
238 let standard_attributes = attributes.standard_attributes();
240 let extension_attributes = attributes.extension_attributes();
241
242 debug!(
243 "Extracted {} total attributes ({} standard, {} extensions)",
244 attributes.len(),
245 standard_attributes.len(),
246 extension_attributes.len()
247 );
248
249 Ok(AttributeExtractionResult {
250 attributes,
251 standard_attributes,
252 extension_attributes,
253 namespace_declarations,
254 special_attributes,
255 warnings,
256 })
257 }
258
259 fn process_attribute(
261 &self,
262 attr: &Attribute,
263 namespace_context: &NamespaceContext,
264 ) -> Result<(QName, AttributeValue), ParseError> {
265 let attr_name = String::from_utf8_lossy(attr.key.as_ref());
266 let attr_value = String::from_utf8_lossy(&attr.value);
267
268 debug!("Processing attribute: {}={}", attr_name, attr_value);
269
270 let qname = self.resolve_attribute_qname(&attr_name, namespace_context);
272
273 let parsed_value = if let Some(attr_type) = self.get_attribute_type(&qname) {
275 AttributeValue::parse_with_type(&attr_value, attr_type).unwrap_or_else(|e| {
276 warn!(
277 "Failed to parse attribute {} as {:?}: {}",
278 qname, attr_type, e
279 );
280 AttributeValue::Raw(attr_value.to_string())
281 })
282 } else {
283 AttributeValue::String(attr_value.to_string())
285 };
286
287 Ok((qname, parsed_value))
288 }
289
290 fn resolve_attribute_qname(
292 &self,
293 attr_name: &str,
294 namespace_context: &NamespaceContext,
295 ) -> QName {
296 if let Some((prefix, local_name)) = attr_name.split_once(':') {
297 if let Some(namespace_uri) = namespace_context.current_scope.resolve_prefix(prefix) {
299 QName::with_prefix_and_namespace(local_name, prefix, namespace_uri)
300 } else {
301 warn!("Unresolved namespace prefix in attribute: {}", attr_name);
303 QName {
304 local_name: local_name.to_string(),
305 namespace_uri: None,
306 prefix: Some(prefix.to_string()),
307 }
308 }
309 } else {
310 if attr_name == "xmlns" || attr_name.starts_with("xmlns:") {
312 QName::new(attr_name)
313 } else {
314 QName::new(attr_name)
316 }
317 }
318 }
319
320 fn get_attribute_type(&self, qname: &QName) -> Option<AttributeType> {
322 if let Some(attr_type) = self.ddex_attribute_types.get(&qname.to_xml_name()) {
324 return Some(*attr_type);
325 }
326
327 self.ddex_attribute_types.get(&qname.local_name).copied()
329 }
330
331 fn process_special_attribute(
333 &self,
334 qname: &QName,
335 value: &AttributeValue,
336 namespace_context: &NamespaceContext,
337 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
338 let attr_name = qname.to_xml_name();
339
340 if let Some(handler) = self.special_attributes.get(&attr_name) {
341 match handler {
342 SpecialAttributeHandler::XsiType => self.process_xsi_type(value, namespace_context),
343 SpecialAttributeHandler::XsiSchemaLocation => self.process_schema_location(value),
344 SpecialAttributeHandler::XsiNoNamespaceSchemaLocation => Ok(Some(
345 SpecialAttributeValue::NoNamespaceSchemaLocation(value.to_xml_value()),
346 )),
347 SpecialAttributeHandler::XsiNil => self.process_xsi_nil(value),
348 SpecialAttributeHandler::NamespaceDeclaration => {
349 Ok(None)
351 }
352 SpecialAttributeHandler::LanguageAndTerritory => {
353 self.process_language_territory(value)
354 }
355 SpecialAttributeHandler::SequenceNumber => self.process_sequence_number(value),
356 SpecialAttributeHandler::BooleanFlag => self.process_boolean_flag(value),
357 }
358 } else {
359 Ok(None)
360 }
361 }
362
363 fn process_xsi_type(
365 &self,
366 value: &AttributeValue,
367 namespace_context: &NamespaceContext,
368 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
369 let type_value = value.to_xml_value();
370
371 if let Some((prefix, local_name)) = type_value.split_once(':') {
372 let namespace_uri = namespace_context.current_scope.resolve_prefix(prefix);
374 Ok(Some(SpecialAttributeValue::XsiType {
375 type_name: local_name.to_string(),
376 namespace_uri,
377 resolved_type: None, }))
379 } else {
380 Ok(Some(SpecialAttributeValue::XsiType {
382 type_name: type_value,
383 namespace_uri: None,
384 resolved_type: None,
385 }))
386 }
387 }
388
389 fn process_schema_location(
391 &self,
392 value: &AttributeValue,
393 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
394 let location_value = value.to_xml_value();
395 let mut locations = IndexMap::new();
396
397 let tokens: Vec<&str> = location_value.split_whitespace().collect();
399 for chunk in tokens.chunks(2) {
400 if chunk.len() == 2 {
401 locations.insert(chunk[0].to_string(), chunk[1].to_string());
402 }
403 }
404
405 Ok(Some(SpecialAttributeValue::SchemaLocation { locations }))
406 }
407
408 fn process_xsi_nil(
410 &self,
411 value: &AttributeValue,
412 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
413 match value {
414 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Nil(*b))),
415 _ => {
416 let str_val = value.to_xml_value();
417 let nil_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
418 Ok(Some(SpecialAttributeValue::Nil(nil_val)))
419 }
420 }
421 }
422
423 fn process_language_territory(
425 &self,
426 value: &AttributeValue,
427 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
428 let lang_value = value.to_xml_value();
429
430 if lang_value.contains('-') {
432 let parts: Vec<&str> = lang_value.split('-').collect();
433 let language = parts[0].to_string();
434 let territory = if parts.len() > 1 {
435 Some(parts[1].to_string())
436 } else {
437 None
438 };
439
440 Ok(Some(SpecialAttributeValue::Language {
441 language,
442 script: None, territory,
444 }))
445 } else if lang_value.contains(' ') {
446 let territories: Vec<String> = lang_value
448 .split_whitespace()
449 .map(|s| s.to_string())
450 .collect();
451 Ok(Some(SpecialAttributeValue::Territory(territories)))
452 } else {
453 Ok(Some(SpecialAttributeValue::Language {
454 language: lang_value,
455 script: None,
456 territory: None,
457 }))
458 }
459 }
460
461 fn process_sequence_number(
463 &self,
464 value: &AttributeValue,
465 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
466 match value {
467 AttributeValue::Integer(i) => Ok(Some(SpecialAttributeValue::Sequence(*i as u32))),
468 _ => {
469 if let Ok(seq) = value.to_xml_value().parse::<u32>() {
470 Ok(Some(SpecialAttributeValue::Sequence(seq)))
471 } else {
472 Ok(None)
473 }
474 }
475 }
476 }
477
478 fn process_boolean_flag(
480 &self,
481 value: &AttributeValue,
482 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
483 match value {
484 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Flag(*b))),
485 _ => {
486 let str_val = value.to_xml_value();
487 let bool_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
488 Ok(Some(SpecialAttributeValue::Flag(bool_val)))
489 }
490 }
491 }
492
493 pub fn apply_inheritance(
495 &self,
496 parent_attributes: &AttributeMap,
497 child_attributes: &mut AttributeMap,
498 ) {
499 let inheritance = ddex_core::models::AttributeInheritance::new();
500 inheritance.apply_inheritance(parent_attributes, child_attributes);
501 }
502
503 pub fn validate_attributes(&self, attributes: &AttributeMap) -> Vec<String> {
505 let mut errors = Vec::new();
506
507 for (qname, value) in attributes.iter() {
508 if let Err(e) = value.validate() {
509 errors.push(format!("Invalid attribute {}: {}", qname, e));
510 }
511 }
512
513 errors
514 }
515}
516
517impl Default for AttributeExtractor {
518 fn default() -> Self {
519 Self::new()
520 }
521}
522
523#[cfg(test)]
524mod tests {
525 use super::*;
526 use quick_xml::Reader;
527 use std::io::Cursor;
528
529 #[test]
530 fn test_attribute_extraction_basic() {
531 let xml = r#"<Release title="Test Album" SequenceNumber="1" IsDefault="true" />"#;
532 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
533 let mut buf = Vec::new();
534
535 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
536 let extractor = AttributeExtractor::new();
537 let namespace_context = NamespaceContext {
538 current_scope: ddex_core::namespace::NamespaceScope::new(),
539 document_namespaces: indexmap::IndexMap::new(),
540 default_namespace: None,
541 ern_version: None,
542 };
543
544 let result = extractor
545 .extract_attributes(&start, &namespace_context)
546 .unwrap();
547
548 assert_eq!(result.attributes.len(), 3);
549 assert_eq!(
550 result.attributes.get_str("title").unwrap().to_xml_value(),
551 "Test Album"
552 );
553 assert_eq!(
554 result
555 .attributes
556 .get_str("SequenceNumber")
557 .unwrap()
558 .to_xml_value(),
559 "1"
560 );
561 assert_eq!(
562 result
563 .attributes
564 .get_str("IsDefault")
565 .unwrap()
566 .to_xml_value(),
567 "true"
568 );
569
570 if let Some(AttributeValue::Integer(seq)) = result.attributes.get_str("SequenceNumber")
572 {
573 assert_eq!(*seq, 1);
574 } else {
575 panic!("SequenceNumber should be parsed as integer");
576 }
577
578 if let Some(AttributeValue::Boolean(is_default)) =
579 result.attributes.get_str("IsDefault")
580 {
581 assert_eq!(*is_default, true);
582 } else {
583 panic!("IsDefault should be parsed as boolean");
584 }
585 }
586 }
587
588 #[test]
589 fn test_namespace_attribute_extraction() {
590 let xml = r#"<ern:Release xmlns:ern="http://ddex.net/xml/ern/43"
591 xmlns:avs="http://ddex.net/xml/avs"
592 ern:title="Test" />"#;
593 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
594 let mut buf = Vec::new();
595
596 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
597 let extractor = AttributeExtractor::new();
598 let namespace_context = NamespaceContext {
599 current_scope: ddex_core::namespace::NamespaceScope::new(),
600 document_namespaces: indexmap::IndexMap::new(),
601 default_namespace: None,
602 ern_version: None,
603 };
604
605 let result = extractor
606 .extract_attributes(&start, &namespace_context)
607 .unwrap();
608
609 assert_eq!(result.namespace_declarations.len(), 2);
610 assert!(result.namespace_declarations.contains_key("ern"));
611 assert!(result.namespace_declarations.contains_key("avs"));
612 }
613 }
614
615 #[test]
616 fn test_special_attribute_processing() {
617 let xml = r#"<element xsi:type="xs:string"
618 xsi:nil="true"
619 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
620 xmlns:xs="http://www.w3.org/2001/XMLSchema" />"#;
621 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
622 let mut buf = Vec::new();
623
624 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
625 let extractor = AttributeExtractor::new();
626 let namespace_context = NamespaceContext {
627 current_scope: ddex_core::namespace::NamespaceScope::new(),
628 document_namespaces: indexmap::IndexMap::new(),
629 default_namespace: None,
630 ern_version: None,
631 };
632
633 let result = extractor
634 .extract_attributes(&start, &namespace_context)
635 .unwrap();
636
637 assert!(!result.special_attributes.is_empty());
638
639 let xsi_nil_qname = QName::with_prefix_and_namespace(
641 "nil".to_string(),
642 "xsi".to_string(),
643 "http://www.w3.org/2001/XMLSchema-instance".to_string(),
644 );
645 if let Some(SpecialAttributeValue::Nil(nil_value)) =
646 result.special_attributes.get(&xsi_nil_qname)
647 {
648 assert_eq!(*nil_value, true);
649 }
650 }
651 }
652
653 #[test]
654 fn test_attribute_inheritance() {
655 let mut parent_attrs = AttributeMap::new();
656 parent_attrs.insert_str("LanguageAndScriptCode", "en-US");
657 parent_attrs.insert_str("ApplicableTerritoryCode", "Worldwide");
658
659 let mut child_attrs = AttributeMap::new();
660 child_attrs.insert_str("title", "Child Title");
661
662 let extractor = AttributeExtractor::new();
663 extractor.apply_inheritance(&parent_attrs, &mut child_attrs);
664
665 assert!(child_attrs.get_str("LanguageAndScriptCode").is_some());
667 assert!(child_attrs.get_str("ApplicableTerritoryCode").is_some());
668 assert!(child_attrs.get_str("title").is_some());
669 }
670
671 #[test]
672 fn test_ddex_standard_vs_extension_attributes() {
673 let mut attributes = AttributeMap::new();
674 attributes.insert_str("LanguageAndScriptCode", "en-US"); attributes.insert_str("custom:proprietary", "custom value"); attributes.insert_str("xmlns:custom", "http://example.com/custom"); let standard = attributes.standard_attributes();
679 let extensions = attributes.extension_attributes();
680
681 assert!(standard.len() >= 1); assert!(extensions.len() >= 1); }
684}