use crate::error::ParseError;
use crate::utf8_utils;
use ddex_core::models::versions::ERNVersion;
use ddex_core::namespace::{
DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
};
use indexmap::IndexMap;
use quick_xml::events::{BytesStart, Event};
use quick_xml::Reader;
use std::io::BufRead;
use tracing::{debug, warn};
#[derive(Debug, Clone)]
pub struct NamespaceDetector {
registry: NamespaceRegistry,
scope_stack: Vec<NamespaceScope>,
detected_namespaces: IndexMap<String, String>, namespace_aliases: IndexMap<String, Vec<String>>, default_namespace_stack: Vec<Option<String>>,
detected_version: Option<ERNVersion>,
warnings: Vec<NamespaceWarning>,
}
#[derive(Debug, Clone)]
pub struct NamespaceDetectionResult {
pub declarations: IndexMap<String, String>,
pub version: Option<ERNVersion>,
pub root_scope: NamespaceScope,
pub warnings: Vec<NamespaceWarning>,
pub default_namespace: Option<String>,
pub custom_namespaces: Vec<NamespaceInfo>,
}
impl NamespaceDetector {
pub fn new() -> Self {
Self {
registry: NamespaceRegistry::new(),
scope_stack: vec![NamespaceScope::new()],
detected_namespaces: IndexMap::new(),
namespace_aliases: IndexMap::new(),
default_namespace_stack: vec![None],
detected_version: None,
warnings: Vec::new(),
}
}
pub fn detect_from_xml<R: BufRead>(
&mut self,
reader: R,
) -> Result<NamespaceDetectionResult, ParseError> {
self.detect_from_xml_with_security(
reader,
&crate::parser::security::SecurityConfig::default(),
)
}
pub fn detect_from_xml_with_security<R: BufRead>(
&mut self,
reader: R,
security_config: &crate::parser::security::SecurityConfig,
) -> Result<NamespaceDetectionResult, ParseError> {
let mut xml_reader = Reader::from_reader(reader);
xml_reader.config_mut().trim_text(true);
xml_reader.config_mut().expand_empty_elements = false;
if security_config.disable_dtd {
}
let mut buf = Vec::new();
let mut depth = 0;
let mut entity_expansions = 0;
loop {
match xml_reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
depth += 1;
if depth > security_config.max_element_depth {
return Err(ParseError::DepthLimitExceeded {
depth,
limit: security_config.max_element_depth,
});
}
self.process_start_element(e)?;
}
Ok(Event::Empty(ref e)) => {
depth += 1;
if depth > security_config.max_element_depth {
return Err(ParseError::DepthLimitExceeded {
depth,
limit: security_config.max_element_depth,
});
}
self.process_start_element(e)?;
self.pop_namespace_scope();
depth -= 1;
}
Ok(Event::End(_)) => {
self.pop_namespace_scope();
depth = depth.saturating_sub(1);
}
Ok(Event::Text(ref e)) => {
let current_pos = xml_reader.buffer_position() as usize;
let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
if text.contains("&") {
entity_expansions += text.matches("&").count();
if entity_expansions > security_config.max_entity_expansions {
return Err(ParseError::SecurityViolation {
message: format!(
"Entity expansions {} exceed maximum allowed {}",
entity_expansions, security_config.max_entity_expansions
),
});
}
}
}
Ok(Event::DocType(_)) if security_config.disable_dtd => {
return Err(ParseError::SecurityViolation {
message: "DTD declarations are disabled for security".to_string(),
});
}
Ok(Event::Eof) => break,
Ok(_) => {} Err(e) => {
return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
}
}
buf.clear();
}
self.validate_namespaces();
Ok(self.build_result())
}
fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
let current_scope = self.scope_stack.last().unwrap().clone();
let mut new_scope = current_scope.new_child();
let mut _has_namespace_declarations = false;
let mut new_default_namespace =
self.default_namespace_stack.last().cloned().unwrap_or(None);
for attr_result in element.attributes() {
let attr = attr_result.map_err(|e| ParseError::XmlError(format!("Attribute error: {}", e)))?;
let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
if key == "xmlns" {
debug!("Found default namespace declaration: {}", value);
new_default_namespace = Some(value.clone());
new_scope.declare_namespace("".to_string(), value.clone());
self.detected_namespaces
.insert("".to_string(), value.clone());
_has_namespace_declarations = true;
if let Some(version) = self.registry.detect_version(&value) {
if self.detected_version.is_none() {
self.detected_version = Some(version);
debug!(
"Detected ERN version: {:?} from namespace: {}",
version, value
);
}
}
} else if key.starts_with("xmlns:") {
let prefix = key.strip_prefix("xmlns:").unwrap_or("");
debug!("Found namespace declaration: {}={}", prefix, value);
new_scope.declare_namespace(prefix.to_string(), value.clone());
self.detected_namespaces
.insert(prefix.to_string(), value.clone());
_has_namespace_declarations = true;
self.namespace_aliases
.entry(value.clone())
.or_default()
.push(prefix.to_string());
if let Some(version) = self.registry.detect_version(&value) {
if self.detected_version.is_none() {
self.detected_version = Some(version);
debug!(
"Detected ERN version: {:?} from namespace: {}",
version, value
);
}
}
}
}
self.scope_stack.push(new_scope);
self.default_namespace_stack.push(new_default_namespace);
Ok(())
}
fn pop_namespace_scope(&mut self) {
if self.scope_stack.len() > 1 {
self.scope_stack.pop();
}
if self.default_namespace_stack.len() > 1 {
self.default_namespace_stack.pop();
}
}
fn validate_namespaces(&mut self) {
let validation_warnings = self
.registry
.validate_declarations(&self.detected_namespaces);
self.warnings.extend(validation_warnings);
}
fn build_result(&self) -> NamespaceDetectionResult {
let mut custom_namespaces = Vec::new();
for (prefix, uri) in &self.detected_namespaces {
if self.registry.get_namespace_info(uri).is_none() {
let custom_info = NamespaceInfo {
uri: uri.clone(),
preferred_prefix: prefix.clone(),
alternative_prefixes: self
.namespace_aliases
.get(uri)
.cloned()
.unwrap_or_default()
.into_iter()
.filter(|p| p != prefix)
.collect(),
standard: DDEXStandard::Custom("Unknown".to_string()),
version: None,
required: false,
};
custom_namespaces.push(custom_info);
}
}
NamespaceDetectionResult {
declarations: self.detected_namespaces.clone(),
version: self.detected_version,
root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
warnings: self.warnings.clone(),
default_namespace: self.detected_namespaces.get("").cloned(),
custom_namespaces,
}
}
pub fn current_scope(&self) -> &NamespaceScope {
self.scope_stack.last().unwrap()
}
pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
self.current_scope().resolve_prefix(prefix)
}
pub fn get_default_namespace(&self) -> Option<&String> {
self.default_namespace_stack.last().unwrap().as_ref()
}
pub fn is_namespace_declared(&self, uri: &str) -> bool {
self.current_scope().is_namespace_declared(uri)
}
pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
self.current_scope().find_prefix_for_uri(uri)
}
pub fn add_warning(&mut self, warning: NamespaceWarning) {
warn!("Namespace warning: {}", warning);
self.warnings.push(warning);
}
pub fn get_detected_version(&self) -> Option<ERNVersion> {
self.detected_version
}
pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
&self.detected_namespaces
}
pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
&self.namespace_aliases
}
}
#[derive(Debug, Clone)]
pub struct NamespaceContext {
pub current_scope: NamespaceScope,
pub document_namespaces: IndexMap<String, String>,
pub default_namespace: Option<String>,
pub ern_version: Option<ERNVersion>,
}
impl NamespaceContext {
pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
Self {
current_scope: result.root_scope,
document_namespaces: result.declarations,
default_namespace: result.default_namespace,
ern_version: result.version,
}
}
pub fn create_child(&self) -> Self {
Self {
current_scope: self.current_scope.new_child(),
document_namespaces: self.document_namespaces.clone(),
default_namespace: self.default_namespace.clone(),
ern_version: self.ern_version,
}
}
pub fn declare_namespace(&mut self, prefix: String, uri: String) {
self.current_scope.declare_namespace(prefix, uri);
}
pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
match prefix {
Some(p) => {
if let Some(uri) = self.document_namespaces.get(p) {
ResolvedName::Qualified {
local_name: local_name.to_string(),
namespace_uri: uri.clone(),
prefix: p.to_string(),
}
} else {
ResolvedName::Unresolved {
local_name: local_name.to_string(),
prefix: Some(p.to_string()),
}
}
}
None => {
if let Some(uri) = &self.default_namespace {
ResolvedName::Qualified {
local_name: local_name.to_string(),
namespace_uri: uri.clone(),
prefix: "".to_string(),
}
} else {
ResolvedName::Unqualified {
local_name: local_name.to_string(),
}
}
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ResolvedName {
Qualified {
local_name: String,
namespace_uri: String,
prefix: String,
},
Unqualified { local_name: String },
Unresolved {
local_name: String,
prefix: Option<String>,
},
}
impl Default for NamespaceDetector {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_namespace_detection_ern_43() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
xmlns:avs="http://ddex.net/xml/avs"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ern:MessageHeader>
<ern:MessageId>MSG001</ern:MessageId>
</ern:MessageHeader>
</ern:NewReleaseMessage>"#;
let mut detector = NamespaceDetector::new();
let cursor = Cursor::new(xml.as_bytes());
let result = detector.detect_from_xml(cursor).unwrap();
assert_eq!(result.version, Some(ERNVersion::V4_3));
assert!(result.declarations.contains_key("ern"));
assert!(result.declarations.contains_key("avs"));
assert!(result.declarations.contains_key("xsi"));
assert_eq!(
result.declarations.get("ern"),
Some(&"http://ddex.net/xml/ern/43".to_string())
);
}
#[test]
fn test_default_namespace_detection() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42"
xmlns:avs="http://ddex.net/xml/avs">
<MessageHeader>
<MessageId>MSG001</MessageId>
</MessageHeader>
</NewReleaseMessage>"#;
let mut detector = NamespaceDetector::new();
let cursor = Cursor::new(xml.as_bytes());
let result = detector.detect_from_xml(cursor).unwrap();
assert_eq!(result.version, Some(ERNVersion::V4_2));
assert_eq!(
result.default_namespace,
Some("http://ddex.net/xml/ern/42".to_string())
);
assert!(result.declarations.contains_key(""));
}
#[test]
fn test_custom_namespace_detection() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
xmlns:custom="http://example.com/custom">
<ern:MessageHeader>
<custom:CustomElement>Test</custom:CustomElement>
</ern:MessageHeader>
</ern:NewReleaseMessage>"#;
let mut detector = NamespaceDetector::new();
let cursor = Cursor::new(xml.as_bytes());
let result = detector.detect_from_xml(cursor).unwrap();
assert_eq!(result.custom_namespaces.len(), 1);
assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
}
#[test]
fn test_namespace_scope_inheritance() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
<ern:MessageHeader xmlns:local="http://example.com/local">
<local:LocalElement>
<ern:ErnElement />
</local:LocalElement>
</ern:MessageHeader>
</ern:NewReleaseMessage>"#;
let mut detector = NamespaceDetector::new();
let cursor = Cursor::new(xml.as_bytes());
let result = detector.detect_from_xml(cursor).unwrap();
assert!(result.declarations.contains_key("ern"));
assert!(result.declarations.contains_key("local"));
}
#[test]
fn test_namespace_context() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
xmlns:avs="http://ddex.net/xml/avs">
</ern:NewReleaseMessage>"#;
let mut detector = NamespaceDetector::new();
let cursor = Cursor::new(xml.as_bytes());
let result = detector.detect_from_xml(cursor).unwrap();
let context = NamespaceContext::from_detection_result(result);
let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
match resolved {
ResolvedName::Qualified {
local_name,
namespace_uri,
prefix,
} => {
assert_eq!(local_name, "MessageHeader");
assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
assert_eq!(prefix, "ern");
}
_ => panic!("Expected qualified name"),
}
}
}