use crate::error::ParseError;
use crate::parser::namespace_detector::NamespaceContext;
use ddex_core::models::{AttributeMap, AttributeType, AttributeValue, QName};
use indexmap::IndexMap;
use quick_xml::events::{attributes::Attribute, BytesStart};
use std::collections::HashMap;
use tracing::{debug, warn};
#[derive(Debug, Clone)]
pub struct AttributeExtractor {
ddex_attribute_types: HashMap<String, AttributeType>,
special_attributes: IndexMap<String, SpecialAttributeHandler>,
}
#[derive(Debug, Clone)]
pub enum SpecialAttributeHandler {
XsiType,
XsiSchemaLocation,
XsiNoNamespaceSchemaLocation,
XsiNil,
NamespaceDeclaration,
LanguageAndTerritory,
SequenceNumber,
BooleanFlag,
}
#[derive(Debug, Clone)]
pub struct AttributeExtractionResult {
pub attributes: AttributeMap,
pub standard_attributes: IndexMap<QName, AttributeValue>,
pub extension_attributes: IndexMap<QName, AttributeValue>,
pub namespace_declarations: IndexMap<String, String>,
pub special_attributes: IndexMap<QName, SpecialAttributeValue>,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum SpecialAttributeValue {
XsiType {
type_name: String,
namespace_uri: Option<String>,
resolved_type: Option<String>,
},
SchemaLocation {
locations: IndexMap<String, String>, },
NoNamespaceSchemaLocation(String),
Nil(bool),
Language {
language: String,
script: Option<String>,
territory: Option<String>,
},
Territory(Vec<String>),
Sequence(u32),
Flag(bool),
}
impl AttributeExtractor {
pub fn new() -> Self {
let mut extractor = Self {
ddex_attribute_types: HashMap::new(),
special_attributes: IndexMap::new(),
};
extractor.initialize_ddex_attributes();
extractor.initialize_special_handlers();
extractor
}
fn initialize_ddex_attributes(&mut self) {
self.ddex_attribute_types
.insert("LanguageAndScriptCode".to_string(), AttributeType::Language);
self.ddex_attribute_types
.insert("ApplicableTerritoryCode".to_string(), AttributeType::String);
self.ddex_attribute_types
.insert("IsDefault".to_string(), AttributeType::Boolean);
self.ddex_attribute_types
.insert("IsMainArtist".to_string(), AttributeType::Boolean);
self.ddex_attribute_types
.insert("HasChanged".to_string(), AttributeType::Boolean);
self.ddex_attribute_types
.insert("SequenceNumber".to_string(), AttributeType::Integer);
self.ddex_attribute_types
.insert("Duration".to_string(), AttributeType::String);
self.ddex_attribute_types
.insert("Namespace".to_string(), AttributeType::Uri);
self.ddex_attribute_types
.insert("CreatedDateTime".to_string(), AttributeType::DateTime);
self.ddex_attribute_types
.insert("UpdatedDateTime".to_string(), AttributeType::DateTime);
}
fn initialize_special_handlers(&mut self) {
self.special_attributes
.insert("xsi:type".to_string(), SpecialAttributeHandler::XsiType);
self.special_attributes.insert(
"xsi:schemaLocation".to_string(),
SpecialAttributeHandler::XsiSchemaLocation,
);
self.special_attributes.insert(
"xsi:noNamespaceSchemaLocation".to_string(),
SpecialAttributeHandler::XsiNoNamespaceSchemaLocation,
);
self.special_attributes
.insert("xsi:nil".to_string(), SpecialAttributeHandler::XsiNil);
self.special_attributes.insert(
"xmlns".to_string(),
SpecialAttributeHandler::NamespaceDeclaration,
);
self.special_attributes.insert(
"LanguageAndScriptCode".to_string(),
SpecialAttributeHandler::LanguageAndTerritory,
);
self.special_attributes.insert(
"ApplicableTerritoryCode".to_string(),
SpecialAttributeHandler::LanguageAndTerritory,
);
self.special_attributes.insert(
"SequenceNumber".to_string(),
SpecialAttributeHandler::SequenceNumber,
);
self.special_attributes.insert(
"IsDefault".to_string(),
SpecialAttributeHandler::BooleanFlag,
);
self.special_attributes.insert(
"IsMainArtist".to_string(),
SpecialAttributeHandler::BooleanFlag,
);
}
pub fn extract_attributes(
&self,
element: &BytesStart,
namespace_context: &NamespaceContext,
) -> Result<AttributeExtractionResult, ParseError> {
let mut attributes = AttributeMap::new();
let mut namespace_declarations = IndexMap::new();
let mut special_attributes = IndexMap::new();
let warnings = Vec::new();
debug!(
"Extracting attributes from element: {}",
String::from_utf8_lossy(element.name().as_ref())
);
for attr_result in element.attributes() {
let attr = attr_result.map_err(|e| ParseError::XmlError(format!("Failed to read attribute: {}", e)))?;
let (qname, attr_value) = self.process_attribute(&attr, namespace_context)?;
if qname.is_namespace_declaration() {
let prefix = if qname.local_name == "xmlns" {
"".to_string() } else {
qname.local_name.clone() };
namespace_declarations.insert(prefix, attr_value.to_xml_value());
debug!(
"Found namespace declaration: {}={}",
qname.to_xml_name(),
attr_value.to_xml_value()
);
}
if let Some(special_value) =
self.process_special_attribute(&qname, &attr_value, namespace_context)?
{
special_attributes.insert(qname.clone(), special_value);
}
attributes.insert(qname, attr_value);
}
let standard_attributes = attributes.standard_attributes();
let extension_attributes = attributes.extension_attributes();
debug!(
"Extracted {} total attributes ({} standard, {} extensions)",
attributes.len(),
standard_attributes.len(),
extension_attributes.len()
);
Ok(AttributeExtractionResult {
attributes,
standard_attributes,
extension_attributes,
namespace_declarations,
special_attributes,
warnings,
})
}
fn process_attribute(
&self,
attr: &Attribute,
namespace_context: &NamespaceContext,
) -> Result<(QName, AttributeValue), ParseError> {
let attr_name = String::from_utf8_lossy(attr.key.as_ref());
let attr_value = String::from_utf8_lossy(&attr.value);
debug!("Processing attribute: {}={}", attr_name, attr_value);
let qname = self.resolve_attribute_qname(&attr_name, namespace_context);
let parsed_value = if let Some(attr_type) = self.get_attribute_type(&qname) {
AttributeValue::parse_with_type(&attr_value, attr_type).unwrap_or_else(|e| {
warn!(
"Failed to parse attribute {} as {:?}: {}",
qname, attr_type, e
);
AttributeValue::Raw(attr_value.to_string())
})
} else {
AttributeValue::String(attr_value.to_string())
};
Ok((qname, parsed_value))
}
fn resolve_attribute_qname(
&self,
attr_name: &str,
namespace_context: &NamespaceContext,
) -> QName {
if let Some((prefix, local_name)) = attr_name.split_once(':') {
if let Some(namespace_uri) = namespace_context.current_scope.resolve_prefix(prefix) {
QName::with_prefix_and_namespace(local_name, prefix, namespace_uri)
} else {
warn!("Unresolved namespace prefix in attribute: {}", attr_name);
QName {
local_name: local_name.to_string(),
namespace_uri: None,
prefix: Some(prefix.to_string()),
}
}
} else {
if attr_name == "xmlns" || attr_name.starts_with("xmlns:") {
QName::new(attr_name)
} else {
QName::new(attr_name)
}
}
}
fn get_attribute_type(&self, qname: &QName) -> Option<AttributeType> {
if let Some(attr_type) = self.ddex_attribute_types.get(&qname.to_xml_name()) {
return Some(*attr_type);
}
self.ddex_attribute_types.get(&qname.local_name).copied()
}
fn process_special_attribute(
&self,
qname: &QName,
value: &AttributeValue,
namespace_context: &NamespaceContext,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
let attr_name = qname.to_xml_name();
if let Some(handler) = self.special_attributes.get(&attr_name) {
match handler {
SpecialAttributeHandler::XsiType => self.process_xsi_type(value, namespace_context),
SpecialAttributeHandler::XsiSchemaLocation => self.process_schema_location(value),
SpecialAttributeHandler::XsiNoNamespaceSchemaLocation => Ok(Some(
SpecialAttributeValue::NoNamespaceSchemaLocation(value.to_xml_value()),
)),
SpecialAttributeHandler::XsiNil => self.process_xsi_nil(value),
SpecialAttributeHandler::NamespaceDeclaration => {
Ok(None)
}
SpecialAttributeHandler::LanguageAndTerritory => {
self.process_language_territory(value)
}
SpecialAttributeHandler::SequenceNumber => self.process_sequence_number(value),
SpecialAttributeHandler::BooleanFlag => self.process_boolean_flag(value),
}
} else {
Ok(None)
}
}
fn process_xsi_type(
&self,
value: &AttributeValue,
namespace_context: &NamespaceContext,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
let type_value = value.to_xml_value();
if let Some((prefix, local_name)) = type_value.split_once(':') {
let namespace_uri = namespace_context.current_scope.resolve_prefix(prefix);
Ok(Some(SpecialAttributeValue::XsiType {
type_name: local_name.to_string(),
namespace_uri,
resolved_type: None, }))
} else {
Ok(Some(SpecialAttributeValue::XsiType {
type_name: type_value,
namespace_uri: None,
resolved_type: None,
}))
}
}
fn process_schema_location(
&self,
value: &AttributeValue,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
let location_value = value.to_xml_value();
let mut locations = IndexMap::new();
let tokens: Vec<&str> = location_value.split_whitespace().collect();
for chunk in tokens.chunks(2) {
if chunk.len() == 2 {
locations.insert(chunk[0].to_string(), chunk[1].to_string());
}
}
Ok(Some(SpecialAttributeValue::SchemaLocation { locations }))
}
fn process_xsi_nil(
&self,
value: &AttributeValue,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
match value {
AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Nil(*b))),
_ => {
let str_val = value.to_xml_value();
let nil_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
Ok(Some(SpecialAttributeValue::Nil(nil_val)))
}
}
}
fn process_language_territory(
&self,
value: &AttributeValue,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
let lang_value = value.to_xml_value();
if lang_value.contains('-') {
let parts: Vec<&str> = lang_value.split('-').collect();
let language = parts[0].to_string();
let territory = if parts.len() > 1 {
Some(parts[1].to_string())
} else {
None
};
Ok(Some(SpecialAttributeValue::Language {
language,
script: None, territory,
}))
} else if lang_value.contains(' ') {
let territories: Vec<String> = lang_value
.split_whitespace()
.map(|s| s.to_string())
.collect();
Ok(Some(SpecialAttributeValue::Territory(territories)))
} else {
Ok(Some(SpecialAttributeValue::Language {
language: lang_value,
script: None,
territory: None,
}))
}
}
fn process_sequence_number(
&self,
value: &AttributeValue,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
match value {
AttributeValue::Integer(i) => Ok(Some(SpecialAttributeValue::Sequence(*i as u32))),
_ => {
if let Ok(seq) = value.to_xml_value().parse::<u32>() {
Ok(Some(SpecialAttributeValue::Sequence(seq)))
} else {
Ok(None)
}
}
}
}
fn process_boolean_flag(
&self,
value: &AttributeValue,
) -> Result<Option<SpecialAttributeValue>, ParseError> {
match value {
AttributeValue::Boolean(b) => Ok(Some(SpecialAttributeValue::Flag(*b))),
_ => {
let str_val = value.to_xml_value();
let bool_val = matches!(str_val.to_lowercase().as_str(), "true" | "1");
Ok(Some(SpecialAttributeValue::Flag(bool_val)))
}
}
}
pub fn apply_inheritance(
&self,
parent_attributes: &AttributeMap,
child_attributes: &mut AttributeMap,
) {
let inheritance = ddex_core::models::AttributeInheritance::new();
inheritance.apply_inheritance(parent_attributes, child_attributes);
}
pub fn validate_attributes(&self, attributes: &AttributeMap) -> Vec<String> {
let mut errors = Vec::new();
for (qname, value) in attributes.iter() {
if let Err(e) = value.validate() {
errors.push(format!("Invalid attribute {}: {}", qname, e));
}
}
errors
}
}
impl Default for AttributeExtractor {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use quick_xml::Reader;
use std::io::Cursor;
#[test]
fn test_attribute_extraction_basic() {
let xml = r#"<Release title="Test Album" SequenceNumber="1" IsDefault="true" />"#;
let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
let mut buf = Vec::new();
if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
let extractor = AttributeExtractor::new();
let namespace_context = NamespaceContext {
current_scope: ddex_core::namespace::NamespaceScope::new(),
document_namespaces: indexmap::IndexMap::new(),
default_namespace: None,
ern_version: None,
};
let result = extractor
.extract_attributes(&start, &namespace_context)
.unwrap();
assert_eq!(result.attributes.len(), 3);
assert_eq!(
result.attributes.get_str("title").unwrap().to_xml_value(),
"Test Album"
);
assert_eq!(
result
.attributes
.get_str("SequenceNumber")
.unwrap()
.to_xml_value(),
"1"
);
assert_eq!(
result
.attributes
.get_str("IsDefault")
.unwrap()
.to_xml_value(),
"true"
);
if let Some(AttributeValue::Integer(seq)) = result.attributes.get_str("SequenceNumber")
{
assert_eq!(*seq, 1);
} else {
panic!("SequenceNumber should be parsed as integer");
}
if let Some(AttributeValue::Boolean(is_default)) =
result.attributes.get_str("IsDefault")
{
assert_eq!(*is_default, true);
} else {
panic!("IsDefault should be parsed as boolean");
}
}
}
#[test]
fn test_namespace_attribute_extraction() {
let xml = r#"<ern:Release xmlns:ern="http://ddex.net/xml/ern/43"
xmlns:avs="http://ddex.net/xml/avs"
ern:title="Test" />"#;
let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
let mut buf = Vec::new();
if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
let extractor = AttributeExtractor::new();
let namespace_context = NamespaceContext {
current_scope: ddex_core::namespace::NamespaceScope::new(),
document_namespaces: indexmap::IndexMap::new(),
default_namespace: None,
ern_version: None,
};
let result = extractor
.extract_attributes(&start, &namespace_context)
.unwrap();
assert_eq!(result.namespace_declarations.len(), 2);
assert!(result.namespace_declarations.contains_key("ern"));
assert!(result.namespace_declarations.contains_key("avs"));
}
}
#[test]
fn test_special_attribute_processing() {
let xml = r#"<element xsi:type="xs:string"
xsi:nil="true"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xs="http://www.w3.org/2001/XMLSchema" />"#;
let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
let mut buf = Vec::new();
if let Ok(quick_xml::events::Event::Empty(start)) = reader.read_event_into(&mut buf) {
let extractor = AttributeExtractor::new();
let namespace_context = NamespaceContext {
current_scope: ddex_core::namespace::NamespaceScope::new(),
document_namespaces: indexmap::IndexMap::new(),
default_namespace: None,
ern_version: None,
};
let result = extractor
.extract_attributes(&start, &namespace_context)
.unwrap();
assert!(!result.special_attributes.is_empty());
let xsi_nil_qname = QName::with_prefix_and_namespace(
"nil".to_string(),
"xsi".to_string(),
"http://www.w3.org/2001/XMLSchema-instance".to_string(),
);
if let Some(SpecialAttributeValue::Nil(nil_value)) =
result.special_attributes.get(&xsi_nil_qname)
{
assert_eq!(*nil_value, true);
}
}
}
#[test]
fn test_attribute_inheritance() {
let mut parent_attrs = AttributeMap::new();
parent_attrs.insert_str("LanguageAndScriptCode", "en-US");
parent_attrs.insert_str("ApplicableTerritoryCode", "Worldwide");
let mut child_attrs = AttributeMap::new();
child_attrs.insert_str("title", "Child Title");
let extractor = AttributeExtractor::new();
extractor.apply_inheritance(&parent_attrs, &mut child_attrs);
assert!(child_attrs.get_str("LanguageAndScriptCode").is_some());
assert!(child_attrs.get_str("ApplicableTerritoryCode").is_some());
assert!(child_attrs.get_str("title").is_some());
}
#[test]
fn test_ddex_standard_vs_extension_attributes() {
let mut attributes = AttributeMap::new();
attributes.insert_str("LanguageAndScriptCode", "en-US"); attributes.insert_str("custom:proprietary", "custom value"); attributes.insert_str("xmlns:custom", "http://example.com/custom");
let standard = attributes.standard_attributes();
let extensions = attributes.extension_attributes();
assert!(standard.len() >= 1); assert!(extensions.len() >= 1); }
}