ddex_parser/parser/
namespace_detector.rs

1//! # Namespace Detection and Management for DDEX Parser
2//!
3//! This module provides comprehensive namespace detection and storage for DDEX XML parsing,
4//! handling scope inheritance, default namespaces, and custom extensions.
5
6use crate::error::ParseError;
7use crate::utf8_utils;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{
10    DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
11};
12use indexmap::IndexMap;
13use quick_xml::events::{BytesStart, Event};
14use quick_xml::Reader;
15use std::io::BufRead;
16use tracing::{debug, warn};
17
18/// Comprehensive namespace detection and management
19#[derive(Debug, Clone)]
20pub struct NamespaceDetector {
21    /// Registry of known namespaces
22    registry: NamespaceRegistry,
23    /// Stack of namespace scopes for element-level inheritance
24    scope_stack: Vec<NamespaceScope>,
25    /// All detected namespace declarations in document order
26    detected_namespaces: IndexMap<String, String>, // prefix -> uri
27    /// Namespace aliases found in the document
28    namespace_aliases: IndexMap<String, Vec<String>>, // uri -> [prefixes]
29    /// Default namespace stack
30    default_namespace_stack: Vec<Option<String>>,
31    /// Detected ERN version from namespaces
32    detected_version: Option<ERNVersion>,
33    /// Warnings collected during namespace processing
34    warnings: Vec<NamespaceWarning>,
35}
36
37/// Namespace detection result
38#[derive(Debug, Clone)]
39pub struct NamespaceDetectionResult {
40    /// All namespace declarations found
41    pub declarations: IndexMap<String, String>,
42    /// Detected ERN version
43    pub version: Option<ERNVersion>,
44    /// Namespace scope tree
45    pub root_scope: NamespaceScope,
46    /// Warnings about namespace usage
47    pub warnings: Vec<NamespaceWarning>,
48    /// Default namespace at root level
49    pub default_namespace: Option<String>,
50    /// Custom namespaces detected
51    pub custom_namespaces: Vec<NamespaceInfo>,
52}
53
54impl NamespaceDetector {
55    /// Create new namespace detector
56    pub fn new() -> Self {
57        Self {
58            registry: NamespaceRegistry::new(),
59            scope_stack: vec![NamespaceScope::new()],
60            detected_namespaces: IndexMap::new(),
61            namespace_aliases: IndexMap::new(),
62            default_namespace_stack: vec![None],
63            detected_version: None,
64            warnings: Vec::new(),
65        }
66    }
67
68    /// Detect namespaces from XML content with security limits
69    pub fn detect_from_xml<R: BufRead>(
70        &mut self,
71        reader: R,
72    ) -> Result<NamespaceDetectionResult, ParseError> {
73        self.detect_from_xml_with_security(
74            reader,
75            &crate::parser::security::SecurityConfig::default(),
76        )
77    }
78
79    /// Detect namespaces from XML content with custom security config
80    pub fn detect_from_xml_with_security<R: BufRead>(
81        &mut self,
82        reader: R,
83        security_config: &crate::parser::security::SecurityConfig,
84    ) -> Result<NamespaceDetectionResult, ParseError> {
85        let mut xml_reader = Reader::from_reader(reader);
86        xml_reader.config_mut().trim_text(true);
87
88        // Configure security settings
89        xml_reader.config_mut().expand_empty_elements = false;
90        if security_config.disable_dtd {
91            // Note: quick_xml doesn't have a direct DTD disable, but we check for DTDs manually
92        }
93
94        let mut buf = Vec::new();
95        let mut depth = 0;
96        let mut entity_expansions = 0;
97
98        loop {
99            match xml_reader.read_event_into(&mut buf) {
100                Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
101                    depth += 1;
102
103                    // Check maximum nesting depth
104                    if depth > security_config.max_element_depth {
105                        return Err(ParseError::DepthLimitExceeded {
106                            depth,
107                            max: security_config.max_element_depth,
108                        });
109                    }
110
111                    self.process_start_element(e)?;
112
113                    // For empty elements, immediately decrement depth
114                    if matches!(xml_reader.read_event_into(&mut buf), Ok(Event::Empty(_))) {
115                        depth -= 1;
116                    }
117                }
118                Ok(Event::End(_)) => {
119                    self.pop_namespace_scope();
120                    depth = depth.saturating_sub(1);
121                }
122                Ok(Event::Text(ref e)) => {
123                    // Use proper UTF-8 decoding for text content
124                    let current_pos = xml_reader.buffer_position() as usize;
125                    let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
126
127                    // Check for potential entity expansions (simple heuristic)
128                    if text.contains("&") {
129                        entity_expansions += text.matches("&").count();
130                        if entity_expansions > security_config.max_entity_expansions {
131                            return Err(ParseError::SecurityViolation {
132                                message: format!(
133                                    "Entity expansions {} exceed maximum allowed {}",
134                                    entity_expansions, security_config.max_entity_expansions
135                                ),
136                            });
137                        }
138                    }
139                }
140                Ok(Event::DocType(_)) if security_config.disable_dtd => {
141                    return Err(ParseError::SecurityViolation {
142                        message: "DTD declarations are disabled for security".to_string(),
143                    });
144                }
145                Ok(Event::Eof) => break,
146                Ok(_) => {} // Ignore other events for namespace detection
147                Err(e) => {
148                    return Err(ParseError::XmlError {
149                        message: format!("XML parsing error: {}", e),
150                        location: crate::error::ErrorLocation::default(),
151                    })
152                }
153            }
154            buf.clear();
155        }
156
157        // Validate detected namespaces
158        self.validate_namespaces();
159
160        Ok(self.build_result())
161    }
162
163    /// Process a start element for namespace declarations
164    fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
165        // Create new scope for this element
166        let current_scope = self.scope_stack.last().unwrap().clone();
167        let mut new_scope = current_scope.new_child();
168
169        // Extract namespace declarations from attributes
170        let mut _has_namespace_declarations = false;
171        let mut new_default_namespace =
172            self.default_namespace_stack.last().cloned().unwrap_or(None);
173
174        for attr_result in element.attributes() {
175            let attr = attr_result.map_err(|e| ParseError::XmlError {
176                message: format!("Attribute error: {}", e),
177                location: crate::error::ErrorLocation::default(),
178            })?;
179            // Use proper UTF-8 decoding for attribute key and value
180            let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
181            let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
182
183            if key == "xmlns" {
184                // Default namespace declaration
185                debug!("Found default namespace declaration: {}", value);
186                new_default_namespace = Some(value.clone());
187                new_scope.declare_namespace("".to_string(), value.clone());
188                self.detected_namespaces
189                    .insert("".to_string(), value.clone());
190                _has_namespace_declarations = true;
191
192                // Try to detect ERN version
193                if let Some(version) = self.registry.detect_version(&value) {
194                    if self.detected_version.is_none() {
195                        self.detected_version = Some(version);
196                        debug!(
197                            "Detected ERN version: {:?} from namespace: {}",
198                            version, value
199                        );
200                    }
201                }
202            } else if key.starts_with("xmlns:") {
203                // Prefixed namespace declaration
204                let prefix = key.strip_prefix("xmlns:").unwrap_or("");
205                debug!("Found namespace declaration: {}={}", prefix, value);
206
207                new_scope.declare_namespace(prefix.to_string(), value.clone());
208                self.detected_namespaces
209                    .insert(prefix.to_string(), value.clone());
210                _has_namespace_declarations = true;
211
212                // Track namespace aliases
213                self.namespace_aliases
214                    .entry(value.clone())
215                    .or_default()
216                    .push(prefix.to_string());
217
218                // Try to detect ERN version
219                if let Some(version) = self.registry.detect_version(&value) {
220                    if self.detected_version.is_none() {
221                        self.detected_version = Some(version);
222                        debug!(
223                            "Detected ERN version: {:?} from namespace: {}",
224                            version, value
225                        );
226                    }
227                }
228            }
229        }
230
231        // Push new scope and default namespace
232        self.scope_stack.push(new_scope);
233        self.default_namespace_stack.push(new_default_namespace);
234
235        Ok(())
236    }
237
238    /// Pop namespace scope when closing an element
239    fn pop_namespace_scope(&mut self) {
240        if self.scope_stack.len() > 1 {
241            self.scope_stack.pop();
242        }
243        if self.default_namespace_stack.len() > 1 {
244            self.default_namespace_stack.pop();
245        }
246    }
247
248    /// Validate detected namespaces against known standards
249    fn validate_namespaces(&mut self) {
250        let validation_warnings = self
251            .registry
252            .validate_declarations(&self.detected_namespaces);
253        self.warnings.extend(validation_warnings);
254    }
255
256    /// Build the final detection result
257    fn build_result(&self) -> NamespaceDetectionResult {
258        // Identify custom namespaces
259        let mut custom_namespaces = Vec::new();
260        for (prefix, uri) in &self.detected_namespaces {
261            if self.registry.get_namespace_info(uri).is_none() {
262                // This is a custom namespace
263                let custom_info = NamespaceInfo {
264                    uri: uri.clone(),
265                    preferred_prefix: prefix.clone(),
266                    alternative_prefixes: self
267                        .namespace_aliases
268                        .get(uri)
269                        .cloned()
270                        .unwrap_or_default()
271                        .into_iter()
272                        .filter(|p| p != prefix)
273                        .collect(),
274                    standard: DDEXStandard::Custom("Unknown".to_string()),
275                    version: None,
276                    required: false,
277                };
278                custom_namespaces.push(custom_info);
279            }
280        }
281
282        NamespaceDetectionResult {
283            declarations: self.detected_namespaces.clone(),
284            version: self.detected_version,
285            root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
286            warnings: self.warnings.clone(),
287            default_namespace: self.detected_namespaces.get("").cloned(),
288            custom_namespaces,
289        }
290    }
291
292    /// Get current namespace scope
293    pub fn current_scope(&self) -> &NamespaceScope {
294        self.scope_stack.last().unwrap()
295    }
296
297    /// Resolve a prefix to its URI in current scope
298    pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
299        self.current_scope().resolve_prefix(prefix)
300    }
301
302    /// Get default namespace in current scope
303    pub fn get_default_namespace(&self) -> Option<&String> {
304        self.default_namespace_stack.last().unwrap().as_ref()
305    }
306
307    /// Check if a namespace is declared in current scope
308    pub fn is_namespace_declared(&self, uri: &str) -> bool {
309        self.current_scope().is_namespace_declared(uri)
310    }
311
312    /// Find prefix for a namespace URI in current scope
313    pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
314        self.current_scope().find_prefix_for_uri(uri)
315    }
316
317    /// Add a warning
318    pub fn add_warning(&mut self, warning: NamespaceWarning) {
319        warn!("Namespace warning: {}", warning);
320        self.warnings.push(warning);
321    }
322
323    /// Get detected ERN version
324    pub fn get_detected_version(&self) -> Option<ERNVersion> {
325        self.detected_version
326    }
327
328    /// Get all detected namespace declarations
329    pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
330        &self.detected_namespaces
331    }
332
333    /// Get namespace aliases
334    pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
335        &self.namespace_aliases
336    }
337}
338
339/// Namespace context for maintaining state during parsing
340#[derive(Debug, Clone)]
341pub struct NamespaceContext {
342    /// Current namespace scope
343    pub current_scope: NamespaceScope,
344    /// Detected namespaces at document level
345    pub document_namespaces: IndexMap<String, String>,
346    /// Current default namespace
347    pub default_namespace: Option<String>,
348    /// Detected ERN version
349    pub ern_version: Option<ERNVersion>,
350}
351
352impl NamespaceContext {
353    /// Create new namespace context from detection result
354    pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
355        Self {
356            current_scope: result.root_scope,
357            document_namespaces: result.declarations,
358            default_namespace: result.default_namespace,
359            ern_version: result.version,
360        }
361    }
362
363    /// Create a new child context for nested elements
364    pub fn create_child(&self) -> Self {
365        Self {
366            current_scope: self.current_scope.new_child(),
367            document_namespaces: self.document_namespaces.clone(),
368            default_namespace: self.default_namespace.clone(),
369            ern_version: self.ern_version,
370        }
371    }
372
373    /// Declare a namespace in current scope
374    pub fn declare_namespace(&mut self, prefix: String, uri: String) {
375        self.current_scope.declare_namespace(prefix, uri);
376    }
377
378    /// Resolve element name with namespace
379    pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
380        match prefix {
381            Some(p) => {
382                if let Some(uri) = self.document_namespaces.get(p) {
383                    ResolvedName::Qualified {
384                        local_name: local_name.to_string(),
385                        namespace_uri: uri.clone(),
386                        prefix: p.to_string(),
387                    }
388                } else {
389                    ResolvedName::Unresolved {
390                        local_name: local_name.to_string(),
391                        prefix: Some(p.to_string()),
392                    }
393                }
394            }
395            None => {
396                // Use default namespace if available
397                if let Some(uri) = &self.default_namespace {
398                    ResolvedName::Qualified {
399                        local_name: local_name.to_string(),
400                        namespace_uri: uri.clone(),
401                        prefix: "".to_string(),
402                    }
403                } else {
404                    ResolvedName::Unqualified {
405                        local_name: local_name.to_string(),
406                    }
407                }
408            }
409        }
410    }
411}
412
413/// Resolved element or attribute name
414#[derive(Debug, Clone, PartialEq)]
415pub enum ResolvedName {
416    /// Fully qualified name with namespace
417    Qualified {
418        local_name: String,
419        namespace_uri: String,
420        prefix: String,
421    },
422    /// Unqualified name (no namespace)
423    Unqualified { local_name: String },
424    /// Unresolved prefix
425    Unresolved {
426        local_name: String,
427        prefix: Option<String>,
428    },
429}
430
431impl Default for NamespaceDetector {
432    fn default() -> Self {
433        Self::new()
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use std::io::Cursor;
441
442    #[test]
443    fn test_namespace_detection_ern_43() {
444        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
445<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
446                       xmlns:avs="http://ddex.net/xml/avs"
447                       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
448    <ern:MessageHeader>
449        <ern:MessageId>MSG001</ern:MessageId>
450    </ern:MessageHeader>
451</ern:NewReleaseMessage>"#;
452
453        let mut detector = NamespaceDetector::new();
454        let cursor = Cursor::new(xml.as_bytes());
455        let result = detector.detect_from_xml(cursor).unwrap();
456
457        assert_eq!(result.version, Some(ERNVersion::V4_3));
458        assert!(result.declarations.contains_key("ern"));
459        assert!(result.declarations.contains_key("avs"));
460        assert!(result.declarations.contains_key("xsi"));
461        assert_eq!(
462            result.declarations.get("ern"),
463            Some(&"http://ddex.net/xml/ern/43".to_string())
464        );
465    }
466
467    #[test]
468    fn test_default_namespace_detection() {
469        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
470<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42" 
471                   xmlns:avs="http://ddex.net/xml/avs">
472    <MessageHeader>
473        <MessageId>MSG001</MessageId>
474    </MessageHeader>
475</NewReleaseMessage>"#;
476
477        let mut detector = NamespaceDetector::new();
478        let cursor = Cursor::new(xml.as_bytes());
479        let result = detector.detect_from_xml(cursor).unwrap();
480
481        assert_eq!(result.version, Some(ERNVersion::V4_2));
482        assert_eq!(
483            result.default_namespace,
484            Some("http://ddex.net/xml/ern/42".to_string())
485        );
486        assert!(result.declarations.contains_key(""));
487    }
488
489    #[test]
490    fn test_custom_namespace_detection() {
491        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
492<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
493                       xmlns:custom="http://example.com/custom">
494    <ern:MessageHeader>
495        <custom:CustomElement>Test</custom:CustomElement>
496    </ern:MessageHeader>
497</ern:NewReleaseMessage>"#;
498
499        let mut detector = NamespaceDetector::new();
500        let cursor = Cursor::new(xml.as_bytes());
501        let result = detector.detect_from_xml(cursor).unwrap();
502
503        assert_eq!(result.custom_namespaces.len(), 1);
504        assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
505        assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
506    }
507
508    #[test]
509    fn test_namespace_scope_inheritance() {
510        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
511<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
512    <ern:MessageHeader xmlns:local="http://example.com/local">
513        <local:LocalElement>
514            <ern:ErnElement />
515        </local:LocalElement>
516    </ern:MessageHeader>
517</ern:NewReleaseMessage>"#;
518
519        let mut detector = NamespaceDetector::new();
520        let cursor = Cursor::new(xml.as_bytes());
521        let result = detector.detect_from_xml(cursor).unwrap();
522
523        // Both namespaces should be detected
524        assert!(result.declarations.contains_key("ern"));
525        assert!(result.declarations.contains_key("local"));
526    }
527
528    #[test]
529    fn test_namespace_context() {
530        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
531<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
532                       xmlns:avs="http://ddex.net/xml/avs">
533</ern:NewReleaseMessage>"#;
534
535        let mut detector = NamespaceDetector::new();
536        let cursor = Cursor::new(xml.as_bytes());
537        let result = detector.detect_from_xml(cursor).unwrap();
538
539        let context = NamespaceContext::from_detection_result(result);
540
541        let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
542        match resolved {
543            ResolvedName::Qualified {
544                local_name,
545                namespace_uri,
546                prefix,
547            } => {
548                assert_eq!(local_name, "MessageHeader");
549                assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
550                assert_eq!(prefix, "ern");
551            }
552            _ => panic!("Expected qualified name"),
553        }
554    }
555}