1use ddex_core::models::{AttributeMap, AttributeValue, QName, AttributeType};
8use crate::parser::namespace_detector::NamespaceContext;
9use crate::error::ParseError;
10use indexmap::IndexMap;
11use quick_xml::events::{BytesStart, attributes::Attribute};
12use std::collections::HashMap;
13use tracing::{debug, warn};
14
15#[derive(Debug, Clone)]
17pub struct AttributeExtractor {
18 ddex_attribute_types: HashMap<String, AttributeType>,
20 special_attributes: IndexMap<String, SpecialAttributeHandler>,
22}
23
24#[derive(Debug, Clone)]
26pub enum SpecialAttributeHandler {
27 XsiType,
29 XsiSchemaLocation,
31 XsiNoNamespaceSchemaLocation,
33 XsiNil,
35 NamespaceDeclaration,
37 LanguageAndTerritory,
39 SequenceNumber,
41 BooleanFlag,
43}
44
45#[derive(Debug, Clone)]
47pub struct AttributeExtractionResult {
48 pub attributes: AttributeMap,
50 pub standard_attributes: IndexMap<QName, AttributeValue>,
52 pub extension_attributes: IndexMap<QName, AttributeValue>,
54 pub namespace_declarations: IndexMap<String, String>,
56 pub special_attributes: IndexMap<QName, SpecialAttributeValue>,
58 pub warnings: Vec<String>,
60}
61
62#[derive(Debug, Clone, PartialEq)]
64pub enum SpecialAttributeValue {
65 XsiType {
67 type_name: String,
68 namespace_uri: Option<String>,
69 resolved_type: Option<String>,
70 },
71 SchemaLocation {
73 locations: IndexMap<String, String>, },
75 NoNamespaceSchemaLocation(String),
77 Nil(bool),
79 Language {
81 language: String,
82 script: Option<String>,
83 territory: Option<String>,
84 },
85 Territory(Vec<String>),
87 Sequence(u32),
89 Flag(bool),
91}
92
93impl AttributeExtractor {
94 pub fn new() -> Self {
96 let mut extractor = Self {
97 ddex_attribute_types: HashMap::new(),
98 special_attributes: IndexMap::new(),
99 };
100
101 extractor.initialize_ddex_attributes();
102 extractor.initialize_special_handlers();
103 extractor
104 }
105
106 fn initialize_ddex_attributes(&mut self) {
108 self.ddex_attribute_types.insert("LanguageAndScriptCode".to_string(), AttributeType::Language);
110 self.ddex_attribute_types.insert("ApplicableTerritoryCode".to_string(), AttributeType::String);
111
112 self.ddex_attribute_types.insert("IsDefault".to_string(), AttributeType::Boolean);
114 self.ddex_attribute_types.insert("IsMainArtist".to_string(), AttributeType::Boolean);
115 self.ddex_attribute_types.insert("HasChanged".to_string(), AttributeType::Boolean);
116
117 self.ddex_attribute_types.insert("SequenceNumber".to_string(), AttributeType::Integer);
119 self.ddex_attribute_types.insert("Duration".to_string(), AttributeType::String); self.ddex_attribute_types.insert("Namespace".to_string(), AttributeType::Uri);
123
124 self.ddex_attribute_types.insert("CreatedDateTime".to_string(), AttributeType::DateTime);
126 self.ddex_attribute_types.insert("UpdatedDateTime".to_string(), AttributeType::DateTime);
127 }
128
129 fn initialize_special_handlers(&mut self) {
131 self.special_attributes.insert("xsi:type".to_string(), SpecialAttributeHandler::XsiType);
133 self.special_attributes.insert("xsi:schemaLocation".to_string(), SpecialAttributeHandler::XsiSchemaLocation);
134 self.special_attributes.insert("xsi:noNamespaceSchemaLocation".to_string(), SpecialAttributeHandler::XsiNoNamespaceSchemaLocation);
135 self.special_attributes.insert("xsi:nil".to_string(), SpecialAttributeHandler::XsiNil);
136
137 self.special_attributes.insert("xmlns".to_string(), SpecialAttributeHandler::NamespaceDeclaration);
139 self.special_attributes.insert("LanguageAndScriptCode".to_string(), SpecialAttributeHandler::LanguageAndTerritory);
143 self.special_attributes.insert("ApplicableTerritoryCode".to_string(), SpecialAttributeHandler::LanguageAndTerritory);
144 self.special_attributes.insert("SequenceNumber".to_string(), SpecialAttributeHandler::SequenceNumber);
145
146 self.special_attributes.insert("IsDefault".to_string(), SpecialAttributeHandler::BooleanFlag);
148 self.special_attributes.insert("IsMainArtist".to_string(), SpecialAttributeHandler::BooleanFlag);
149 }
150
151 pub fn extract_attributes(
153 &self,
154 element: &BytesStart,
155 namespace_context: &NamespaceContext,
156 ) -> Result<AttributeExtractionResult, ParseError> {
157 let mut attributes = AttributeMap::new();
158 let mut namespace_declarations = IndexMap::new();
159 let mut special_attributes = IndexMap::new();
160 let warnings = Vec::new();
161
162 debug!("Extracting attributes from element: {}", String::from_utf8_lossy(element.name().as_ref()));
163
164 for attr_result in element.attributes() {
166 let attr = attr_result.map_err(|e| ParseError::XmlError {
167 message: format!("Failed to read attribute: {}", e),
168 location: crate::error::ErrorLocation::default(),
169 })?;
170
171 let (qname, attr_value) = self.process_attribute(&attr, namespace_context)?;
172
173 if qname.is_namespace_declaration() {
175 let prefix = if qname.local_name == "xmlns" {
176 "".to_string() } else {
178 qname.local_name.clone() };
180 namespace_declarations.insert(prefix, attr_value.to_xml_value());
181 debug!("Found namespace declaration: {}={}", qname.to_xml_name(), attr_value.to_xml_value());
182 }
183
184 if let Some(special_value) = self.process_special_attribute(&qname, &attr_value, namespace_context)? {
186 special_attributes.insert(qname.clone(), special_value);
187 }
188
189 attributes.insert(qname, attr_value);
191 }
192
193 let standard_attributes = attributes.standard_attributes();
195 let extension_attributes = attributes.extension_attributes();
196
197 debug!("Extracted {} total attributes ({} standard, {} extensions)",
198 attributes.len(), standard_attributes.len(), extension_attributes.len());
199
200 Ok(AttributeExtractionResult {
201 attributes,
202 standard_attributes,
203 extension_attributes,
204 namespace_declarations,
205 special_attributes,
206 warnings,
207 })
208 }
209
210 fn process_attribute(
212 &self,
213 attr: &Attribute,
214 namespace_context: &NamespaceContext,
215 ) -> Result<(QName, AttributeValue), ParseError> {
216 let attr_name = String::from_utf8_lossy(attr.key.as_ref());
217 let attr_value = String::from_utf8_lossy(&attr.value);
218
219 debug!("Processing attribute: {}={}", attr_name, attr_value);
220
221 let qname = self.resolve_attribute_qname(&attr_name, namespace_context);
223
224 let parsed_value = if let Some(attr_type) = self.get_attribute_type(&qname) {
226 AttributeValue::parse_with_type(&attr_value, attr_type)
227 .unwrap_or_else(|e| {
228 warn!("Failed to parse attribute {} as {:?}: {}", qname, attr_type, e);
229 AttributeValue::Raw(attr_value.to_string())
230 })
231 } else {
232 AttributeValue::String(attr_value.to_string())
234 };
235
236 Ok((qname, parsed_value))
237 }
238
239 fn resolve_attribute_qname(&self, attr_name: &str, namespace_context: &NamespaceContext) -> QName {
241 if let Some((prefix, local_name)) = attr_name.split_once(':') {
242 if let Some(namespace_uri) = namespace_context.current_scope.resolve_prefix(prefix) {
244 QName::with_prefix_and_namespace(local_name, prefix, namespace_uri)
245 } else {
246 warn!("Unresolved namespace prefix in attribute: {}", attr_name);
248 QName {
249 local_name: local_name.to_string(),
250 namespace_uri: None,
251 prefix: Some(prefix.to_string()),
252 }
253 }
254 } else {
255 if attr_name == "xmlns" || attr_name.starts_with("xmlns:") {
257 QName::new(attr_name)
258 } else {
259 QName::new(attr_name)
261 }
262 }
263 }
264
265 fn get_attribute_type(&self, qname: &QName) -> Option<AttributeType> {
267 if let Some(attr_type) = self.ddex_attribute_types.get(&qname.to_xml_name()) {
269 return Some(*attr_type);
270 }
271
272 self.ddex_attribute_types.get(&qname.local_name).copied()
274 }
275
276 fn process_special_attribute(
278 &self,
279 qname: &QName,
280 value: &AttributeValue,
281 namespace_context: &NamespaceContext,
282 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
283 let attr_name = qname.to_xml_name();
284
285 if let Some(handler) = self.special_attributes.get(&attr_name) {
286 match handler {
287 SpecialAttributeHandler::XsiType => {
288 self.process_xsi_type(value, namespace_context)
289 },
290 SpecialAttributeHandler::XsiSchemaLocation => {
291 self.process_schema_location(value)
292 },
293 SpecialAttributeHandler::XsiNoNamespaceSchemaLocation => {
294 Ok(Some(SpecialAttributeValue::NoNamespaceSchemaLocation(value.to_xml_value())))
295 },
296 SpecialAttributeHandler::XsiNil => {
297 self.process_xsi_nil(value)
298 },
299 SpecialAttributeHandler::NamespaceDeclaration => {
300 Ok(None)
302 },
303 SpecialAttributeHandler::LanguageAndTerritory => {
304 self.process_language_territory(value)
305 },
306 SpecialAttributeHandler::SequenceNumber => {
307 self.process_sequence_number(value)
308 },
309 SpecialAttributeHandler::BooleanFlag => {
310 self.process_boolean_flag(value)
311 },
312 }
313 } else {
314 Ok(None)
315 }
316 }
317
318 fn process_xsi_type(
320 &self,
321 value: &AttributeValue,
322 namespace_context: &NamespaceContext,
323 ) -> Result<Option<SpecialAttributeValue>, ParseError> {
324 let type_value = value.to_xml_value();
325
326 if let Some((prefix, local_name)) = type_value.split_once(':') {
327 let namespace_uri = namespace_context.current_scope.resolve_prefix(prefix);
329 Ok(Some(SpecialAttributeValue::XsiType {
330 type_name: local_name.to_string(),
331 namespace_uri,
332 resolved_type: None, }))
334 } else {
335 Ok(Some(SpecialAttributeValue::XsiType {
337 type_name: type_value,
338 namespace_uri: None,
339 resolved_type: None,
340 }))
341 }
342 }
343
344 fn process_schema_location(&self, value: &AttributeValue) -> Result<Option<SpecialAttributeValue>, ParseError> {
346 let location_value = value.to_xml_value();
347 let mut locations = IndexMap::new();
348
349 let tokens: Vec<&str> = location_value.split_whitespace().collect();
351 for chunk in tokens.chunks(2) {
352 if chunk.len() == 2 {
353 locations.insert(chunk[0].to_string(), chunk[1].to_string());
354 }
355 }
356
357 Ok(Some(SpecialAttributeValue::SchemaLocation { locations }))
358 }
359
360 fn process_xsi_nil(&self, value: &AttributeValue) -> Result<Option<SpecialAttributeValue>, ParseError> {
362 match value {
363 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Nil(*b))),
364 _ => {
365 let str_val = value.to_xml_value();
366 let nil_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
367 Ok(Some(SpecialAttributeValue::Nil(nil_val)))
368 }
369 }
370 }
371
372 fn process_language_territory(&self, value: &AttributeValue) -> Result<Option<SpecialAttributeValue>, ParseError> {
374 let lang_value = value.to_xml_value();
375
376 if lang_value.contains('-') {
378 let parts: Vec<&str> = lang_value.split('-').collect();
379 let language = parts[0].to_string();
380 let territory = if parts.len() > 1 {
381 Some(parts[1].to_string())
382 } else {
383 None
384 };
385
386 Ok(Some(SpecialAttributeValue::Language {
387 language,
388 script: None, territory,
390 }))
391 } else if lang_value.contains(' ') {
392 let territories: Vec<String> = lang_value.split_whitespace()
394 .map(|s| s.to_string())
395 .collect();
396 Ok(Some(SpecialAttributeValue::Territory(territories)))
397 } else {
398 Ok(Some(SpecialAttributeValue::Language {
399 language: lang_value,
400 script: None,
401 territory: None,
402 }))
403 }
404 }
405
406 fn process_sequence_number(&self, value: &AttributeValue) -> Result<Option<SpecialAttributeValue>, ParseError> {
408 match value {
409 AttributeValue::Integer(i) => Ok(Some(SpecialAttributeValue::Sequence(*i as u32))),
410 _ => {
411 if let Ok(seq) = value.to_xml_value().parse::<u32>() {
412 Ok(Some(SpecialAttributeValue::Sequence(seq)))
413 } else {
414 Ok(None)
415 }
416 }
417 }
418 }
419
420 fn process_boolean_flag(&self, value: &AttributeValue) -> Result<Option<SpecialAttributeValue>, ParseError> {
422 match value {
423 AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Flag(*b))),
424 _ => {
425 let str_val = value.to_xml_value();
426 let bool_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
427 Ok(Some(SpecialAttributeValue::Flag(bool_val)))
428 }
429 }
430 }
431
432 pub fn apply_inheritance(
434 &self,
435 parent_attributes: &AttributeMap,
436 child_attributes: &mut AttributeMap,
437 ) {
438 let inheritance = ddex_core::models::AttributeInheritance::new();
439 inheritance.apply_inheritance(parent_attributes, child_attributes);
440 }
441
442 pub fn validate_attributes(&self, attributes: &AttributeMap) -> Vec<String> {
444 let mut errors = Vec::new();
445
446 for (qname, value) in attributes.iter() {
447 if let Err(e) = value.validate() {
448 errors.push(format!("Invalid attribute {}: {}", qname, e));
449 }
450 }
451
452 errors
453 }
454}
455
456impl Default for AttributeExtractor {
457 fn default() -> Self {
458 Self::new()
459 }
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465 use quick_xml::Reader;
466 use std::io::Cursor;
467
468 #[test]
469 fn test_attribute_extraction_basic() {
470 let xml = r#"<Release title="Test Album" SequenceNumber="1" IsDefault="true" />"#;
471 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
472 let mut buf = Vec::new();
473
474 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
475 let extractor = AttributeExtractor::new();
476 let namespace_context = NamespaceContext {
477 current_scope: ddex_core::namespace::NamespaceScope::new(),
478 document_namespaces: indexmap::IndexMap::new(),
479 default_namespace: None,
480 ern_version: None,
481 };
482
483 let result = extractor.extract_attributes(&start, &namespace_context).unwrap();
484
485 assert_eq!(result.attributes.len(), 3);
486 assert_eq!(result.attributes.get_str("title").unwrap().to_xml_value(), "Test Album");
487 assert_eq!(result.attributes.get_str("SequenceNumber").unwrap().to_xml_value(), "1");
488 assert_eq!(result.attributes.get_str("IsDefault").unwrap().to_xml_value(), "true");
489
490 if let Some(AttributeValue::Integer(seq)) = result.attributes.get_str("SequenceNumber") {
492 assert_eq!(*seq, 1);
493 } else {
494 panic!("SequenceNumber should be parsed as integer");
495 }
496
497 if let Some(AttributeValue::Boolean(is_default)) = result.attributes.get_str("IsDefault") {
498 assert_eq!(*is_default, true);
499 } else {
500 panic!("IsDefault should be parsed as boolean");
501 }
502 }
503 }
504
505 #[test]
506 fn test_namespace_attribute_extraction() {
507 let xml = r#"<ern:Release xmlns:ern="http://ddex.net/xml/ern/43"
508 xmlns:avs="http://ddex.net/xml/avs"
509 ern:title="Test" />"#;
510 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
511 let mut buf = Vec::new();
512
513 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
514 let extractor = AttributeExtractor::new();
515 let namespace_context = NamespaceContext {
516 current_scope: ddex_core::namespace::NamespaceScope::new(),
517 document_namespaces: indexmap::IndexMap::new(),
518 default_namespace: None,
519 ern_version: None,
520 };
521
522 let result = extractor.extract_attributes(&start, &namespace_context).unwrap();
523
524 assert_eq!(result.namespace_declarations.len(), 2);
525 assert!(result.namespace_declarations.contains_key("ern"));
526 assert!(result.namespace_declarations.contains_key("avs"));
527 }
528 }
529
530 #[test]
531 fn test_special_attribute_processing() {
532 let xml = r#"<element xsi:type="xs:string"
533 xsi:nil="true"
534 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
535 xmlns:xs="http://www.w3.org/2001/XMLSchema" />"#;
536 let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
537 let mut buf = Vec::new();
538
539 if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
540 let extractor = AttributeExtractor::new();
541 let namespace_context = NamespaceContext {
542 current_scope: ddex_core::namespace::NamespaceScope::new(),
543 document_namespaces: indexmap::IndexMap::new(),
544 default_namespace: None,
545 ern_version: None,
546 };
547
548 let result = extractor.extract_attributes(&start, &namespace_context).unwrap();
549
550 assert!(!result.special_attributes.is_empty());
551
552 let xsi_nil_qname = QName::with_prefix_and_namespace(
554 "nil".to_string(),
555 "xsi".to_string(),
556 "http://www.w3.org/2001/XMLSchema-instance".to_string()
557 );
558 if let Some(SpecialAttributeValue::Nil(nil_value)) = result.special_attributes.get(&xsi_nil_qname) {
559 assert_eq!(*nil_value, true);
560 }
561 }
562 }
563
564 #[test]
565 fn test_attribute_inheritance() {
566 let mut parent_attrs = AttributeMap::new();
567 parent_attrs.insert_str("LanguageAndScriptCode", "en-US");
568 parent_attrs.insert_str("ApplicableTerritoryCode", "Worldwide");
569
570 let mut child_attrs = AttributeMap::new();
571 child_attrs.insert_str("title", "Child Title");
572
573 let extractor = AttributeExtractor::new();
574 extractor.apply_inheritance(&parent_attrs, &mut child_attrs);
575
576 assert!(child_attrs.get_str("LanguageAndScriptCode").is_some());
578 assert!(child_attrs.get_str("ApplicableTerritoryCode").is_some());
579 assert!(child_attrs.get_str("title").is_some());
580 }
581
582 #[test]
583 fn test_ddex_standard_vs_extension_attributes() {
584 let mut attributes = AttributeMap::new();
585 attributes.insert_str("LanguageAndScriptCode", "en-US"); attributes.insert_str("custom:proprietary", "custom value"); attributes.insert_str("xmlns:custom", "http://example.com/custom"); let standard = attributes.standard_attributes();
590 let extensions = attributes.extension_attributes();
591
592 assert!(standard.len() >= 1); assert!(extensions.len() >= 1); }
595}