ddex_parser/parser/
namespace_detector.rs

1//! # Namespace Detection and Management for DDEX Parser
2//!
3//! This module provides comprehensive namespace detection and storage for DDEX XML parsing,
4//! handling scope inheritance, default namespaces, and custom extensions.
5
6use crate::error::ParseError;
7use crate::utf8_utils;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{
10    DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
11};
12use indexmap::IndexMap;
13use quick_xml::events::{BytesStart, Event};
14use quick_xml::Reader;
15use std::io::BufRead;
16use tracing::{debug, warn};
17
18/// Comprehensive namespace detection and management
19#[derive(Debug, Clone)]
20pub struct NamespaceDetector {
21    /// Registry of known namespaces
22    registry: NamespaceRegistry,
23    /// Stack of namespace scopes for element-level inheritance
24    scope_stack: Vec<NamespaceScope>,
25    /// All detected namespace declarations in document order
26    detected_namespaces: IndexMap<String, String>, // prefix -> uri
27    /// Namespace aliases found in the document
28    namespace_aliases: IndexMap<String, Vec<String>>, // uri -> [prefixes]
29    /// Default namespace stack
30    default_namespace_stack: Vec<Option<String>>,
31    /// Detected ERN version from namespaces
32    detected_version: Option<ERNVersion>,
33    /// Warnings collected during namespace processing
34    warnings: Vec<NamespaceWarning>,
35}
36
37/// Namespace detection result
38#[derive(Debug, Clone)]
39pub struct NamespaceDetectionResult {
40    /// All namespace declarations found
41    pub declarations: IndexMap<String, String>,
42    /// Detected ERN version
43    pub version: Option<ERNVersion>,
44    /// Namespace scope tree
45    pub root_scope: NamespaceScope,
46    /// Warnings about namespace usage
47    pub warnings: Vec<NamespaceWarning>,
48    /// Default namespace at root level
49    pub default_namespace: Option<String>,
50    /// Custom namespaces detected
51    pub custom_namespaces: Vec<NamespaceInfo>,
52}
53
54impl NamespaceDetector {
55    /// Create new namespace detector
56    pub fn new() -> Self {
57        Self {
58            registry: NamespaceRegistry::new(),
59            scope_stack: vec![NamespaceScope::new()],
60            detected_namespaces: IndexMap::new(),
61            namespace_aliases: IndexMap::new(),
62            default_namespace_stack: vec![None],
63            detected_version: None,
64            warnings: Vec::new(),
65        }
66    }
67
68    /// Detect namespaces from XML content with security limits
69    pub fn detect_from_xml<R: BufRead>(
70        &mut self,
71        reader: R,
72    ) -> Result<NamespaceDetectionResult, ParseError> {
73        self.detect_from_xml_with_security(
74            reader,
75            &crate::parser::security::SecurityConfig::default(),
76        )
77    }
78
79    /// Detect namespaces from XML content with custom security config
80    pub fn detect_from_xml_with_security<R: BufRead>(
81        &mut self,
82        reader: R,
83        security_config: &crate::parser::security::SecurityConfig,
84    ) -> Result<NamespaceDetectionResult, ParseError> {
85        let mut xml_reader = Reader::from_reader(reader);
86        xml_reader.config_mut().trim_text(true);
87
88        // Configure security settings
89        xml_reader.config_mut().expand_empty_elements = false;
90        if security_config.disable_dtd {
91            // Note: quick_xml doesn't have a direct DTD disable, but we check for DTDs manually
92        }
93
94        let mut buf = Vec::new();
95        let mut depth = 0;
96        let mut entity_expansions = 0;
97
98        loop {
99            match xml_reader.read_event_into(&mut buf) {
100                Ok(Event::Start(ref e)) => {
101                    depth += 1;
102
103                    // Check maximum nesting depth
104                    if depth > security_config.max_element_depth {
105                        return Err(ParseError::DepthLimitExceeded {
106                            depth,
107                            max: security_config.max_element_depth,
108                        });
109                    }
110
111                    self.process_start_element(e)?;
112                }
113                Ok(Event::Empty(ref e)) => {
114                    depth += 1;
115
116                    // Check maximum nesting depth
117                    if depth > security_config.max_element_depth {
118                        return Err(ParseError::DepthLimitExceeded {
119                            depth,
120                            max: security_config.max_element_depth,
121                        });
122                    }
123
124                    self.process_start_element(e)?;
125
126                    // For empty elements, immediately pop scope and decrement depth
127                    self.pop_namespace_scope();
128                    depth -= 1;
129                }
130                Ok(Event::End(_)) => {
131                    self.pop_namespace_scope();
132                    depth = depth.saturating_sub(1);
133                }
134                Ok(Event::Text(ref e)) => {
135                    // Use proper UTF-8 decoding for text content
136                    let current_pos = xml_reader.buffer_position() as usize;
137                    let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
138
139                    // Check for potential entity expansions (simple heuristic)
140                    if text.contains("&") {
141                        entity_expansions += text.matches("&").count();
142                        if entity_expansions > security_config.max_entity_expansions {
143                            return Err(ParseError::SecurityViolation {
144                                message: format!(
145                                    "Entity expansions {} exceed maximum allowed {}",
146                                    entity_expansions, security_config.max_entity_expansions
147                                ),
148                            });
149                        }
150                    }
151                }
152                Ok(Event::DocType(_)) if security_config.disable_dtd => {
153                    return Err(ParseError::SecurityViolation {
154                        message: "DTD declarations are disabled for security".to_string(),
155                    });
156                }
157                Ok(Event::Eof) => break,
158                Ok(_) => {} // Ignore other events for namespace detection
159                Err(e) => {
160                    return Err(ParseError::XmlError {
161                        message: format!("XML parsing error: {}", e),
162                        location: crate::error::ErrorLocation::default(),
163                    })
164                }
165            }
166            buf.clear();
167        }
168
169        // Validate detected namespaces
170        self.validate_namespaces();
171
172        Ok(self.build_result())
173    }
174
175    /// Process a start element for namespace declarations
176    fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
177        // Create new scope for this element
178        let current_scope = self.scope_stack.last().unwrap().clone();
179        let mut new_scope = current_scope.new_child();
180
181        // Extract namespace declarations from attributes
182        let mut _has_namespace_declarations = false;
183        let mut new_default_namespace =
184            self.default_namespace_stack.last().cloned().unwrap_or(None);
185
186        for attr_result in element.attributes() {
187            let attr = attr_result.map_err(|e| ParseError::XmlError {
188                message: format!("Attribute error: {}", e),
189                location: crate::error::ErrorLocation::default(),
190            })?;
191            // Use proper UTF-8 decoding for attribute key and value
192            let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
193            let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
194
195            if key == "xmlns" {
196                // Default namespace declaration
197                debug!("Found default namespace declaration: {}", value);
198                new_default_namespace = Some(value.clone());
199                new_scope.declare_namespace("".to_string(), value.clone());
200                self.detected_namespaces
201                    .insert("".to_string(), value.clone());
202                _has_namespace_declarations = true;
203
204                // Try to detect ERN version
205                if let Some(version) = self.registry.detect_version(&value) {
206                    if self.detected_version.is_none() {
207                        self.detected_version = Some(version);
208                        debug!(
209                            "Detected ERN version: {:?} from namespace: {}",
210                            version, value
211                        );
212                    }
213                }
214            } else if key.starts_with("xmlns:") {
215                // Prefixed namespace declaration
216                let prefix = key.strip_prefix("xmlns:").unwrap_or("");
217                debug!("Found namespace declaration: {}={}", prefix, value);
218
219                new_scope.declare_namespace(prefix.to_string(), value.clone());
220                self.detected_namespaces
221                    .insert(prefix.to_string(), value.clone());
222                _has_namespace_declarations = true;
223
224                // Track namespace aliases
225                self.namespace_aliases
226                    .entry(value.clone())
227                    .or_default()
228                    .push(prefix.to_string());
229
230                // Try to detect ERN version
231                if let Some(version) = self.registry.detect_version(&value) {
232                    if self.detected_version.is_none() {
233                        self.detected_version = Some(version);
234                        debug!(
235                            "Detected ERN version: {:?} from namespace: {}",
236                            version, value
237                        );
238                    }
239                }
240            }
241        }
242
243        // Push new scope and default namespace
244        self.scope_stack.push(new_scope);
245        self.default_namespace_stack.push(new_default_namespace);
246
247        Ok(())
248    }
249
250    /// Pop namespace scope when closing an element
251    fn pop_namespace_scope(&mut self) {
252        if self.scope_stack.len() > 1 {
253            self.scope_stack.pop();
254        }
255        if self.default_namespace_stack.len() > 1 {
256            self.default_namespace_stack.pop();
257        }
258    }
259
260    /// Validate detected namespaces against known standards
261    fn validate_namespaces(&mut self) {
262        let validation_warnings = self
263            .registry
264            .validate_declarations(&self.detected_namespaces);
265        self.warnings.extend(validation_warnings);
266    }
267
268    /// Build the final detection result
269    fn build_result(&self) -> NamespaceDetectionResult {
270        // Identify custom namespaces
271        let mut custom_namespaces = Vec::new();
272        for (prefix, uri) in &self.detected_namespaces {
273            if self.registry.get_namespace_info(uri).is_none() {
274                // This is a custom namespace
275                let custom_info = NamespaceInfo {
276                    uri: uri.clone(),
277                    preferred_prefix: prefix.clone(),
278                    alternative_prefixes: self
279                        .namespace_aliases
280                        .get(uri)
281                        .cloned()
282                        .unwrap_or_default()
283                        .into_iter()
284                        .filter(|p| p != prefix)
285                        .collect(),
286                    standard: DDEXStandard::Custom("Unknown".to_string()),
287                    version: None,
288                    required: false,
289                };
290                custom_namespaces.push(custom_info);
291            }
292        }
293
294        NamespaceDetectionResult {
295            declarations: self.detected_namespaces.clone(),
296            version: self.detected_version,
297            root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
298            warnings: self.warnings.clone(),
299            default_namespace: self.detected_namespaces.get("").cloned(),
300            custom_namespaces,
301        }
302    }
303
304    /// Get current namespace scope
305    pub fn current_scope(&self) -> &NamespaceScope {
306        self.scope_stack.last().unwrap()
307    }
308
309    /// Resolve a prefix to its URI in current scope
310    pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
311        self.current_scope().resolve_prefix(prefix)
312    }
313
314    /// Get default namespace in current scope
315    pub fn get_default_namespace(&self) -> Option<&String> {
316        self.default_namespace_stack.last().unwrap().as_ref()
317    }
318
319    /// Check if a namespace is declared in current scope
320    pub fn is_namespace_declared(&self, uri: &str) -> bool {
321        self.current_scope().is_namespace_declared(uri)
322    }
323
324    /// Find prefix for a namespace URI in current scope
325    pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
326        self.current_scope().find_prefix_for_uri(uri)
327    }
328
329    /// Add a warning
330    pub fn add_warning(&mut self, warning: NamespaceWarning) {
331        warn!("Namespace warning: {}", warning);
332        self.warnings.push(warning);
333    }
334
335    /// Get detected ERN version
336    pub fn get_detected_version(&self) -> Option<ERNVersion> {
337        self.detected_version
338    }
339
340    /// Get all detected namespace declarations
341    pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
342        &self.detected_namespaces
343    }
344
345    /// Get namespace aliases
346    pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
347        &self.namespace_aliases
348    }
349}
350
351/// Namespace context for maintaining state during parsing
352#[derive(Debug, Clone)]
353pub struct NamespaceContext {
354    /// Current namespace scope
355    pub current_scope: NamespaceScope,
356    /// Detected namespaces at document level
357    pub document_namespaces: IndexMap<String, String>,
358    /// Current default namespace
359    pub default_namespace: Option<String>,
360    /// Detected ERN version
361    pub ern_version: Option<ERNVersion>,
362}
363
364impl NamespaceContext {
365    /// Create new namespace context from detection result
366    pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
367        Self {
368            current_scope: result.root_scope,
369            document_namespaces: result.declarations,
370            default_namespace: result.default_namespace,
371            ern_version: result.version,
372        }
373    }
374
375    /// Create a new child context for nested elements
376    pub fn create_child(&self) -> Self {
377        Self {
378            current_scope: self.current_scope.new_child(),
379            document_namespaces: self.document_namespaces.clone(),
380            default_namespace: self.default_namespace.clone(),
381            ern_version: self.ern_version,
382        }
383    }
384
385    /// Declare a namespace in current scope
386    pub fn declare_namespace(&mut self, prefix: String, uri: String) {
387        self.current_scope.declare_namespace(prefix, uri);
388    }
389
390    /// Resolve element name with namespace
391    pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
392        match prefix {
393            Some(p) => {
394                if let Some(uri) = self.document_namespaces.get(p) {
395                    ResolvedName::Qualified {
396                        local_name: local_name.to_string(),
397                        namespace_uri: uri.clone(),
398                        prefix: p.to_string(),
399                    }
400                } else {
401                    ResolvedName::Unresolved {
402                        local_name: local_name.to_string(),
403                        prefix: Some(p.to_string()),
404                    }
405                }
406            }
407            None => {
408                // Use default namespace if available
409                if let Some(uri) = &self.default_namespace {
410                    ResolvedName::Qualified {
411                        local_name: local_name.to_string(),
412                        namespace_uri: uri.clone(),
413                        prefix: "".to_string(),
414                    }
415                } else {
416                    ResolvedName::Unqualified {
417                        local_name: local_name.to_string(),
418                    }
419                }
420            }
421        }
422    }
423}
424
425/// Resolved element or attribute name
426#[derive(Debug, Clone, PartialEq)]
427pub enum ResolvedName {
428    /// Fully qualified name with namespace
429    Qualified {
430        local_name: String,
431        namespace_uri: String,
432        prefix: String,
433    },
434    /// Unqualified name (no namespace)
435    Unqualified { local_name: String },
436    /// Unresolved prefix
437    Unresolved {
438        local_name: String,
439        prefix: Option<String>,
440    },
441}
442
443impl Default for NamespaceDetector {
444    fn default() -> Self {
445        Self::new()
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452    use std::io::Cursor;
453
454    #[test]
455    fn test_namespace_detection_ern_43() {
456        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
457<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
458                       xmlns:avs="http://ddex.net/xml/avs"
459                       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
460    <ern:MessageHeader>
461        <ern:MessageId>MSG001</ern:MessageId>
462    </ern:MessageHeader>
463</ern:NewReleaseMessage>"#;
464
465        let mut detector = NamespaceDetector::new();
466        let cursor = Cursor::new(xml.as_bytes());
467        let result = detector.detect_from_xml(cursor).unwrap();
468
469        assert_eq!(result.version, Some(ERNVersion::V4_3));
470        assert!(result.declarations.contains_key("ern"));
471        assert!(result.declarations.contains_key("avs"));
472        assert!(result.declarations.contains_key("xsi"));
473        assert_eq!(
474            result.declarations.get("ern"),
475            Some(&"http://ddex.net/xml/ern/43".to_string())
476        );
477    }
478
479    #[test]
480    fn test_default_namespace_detection() {
481        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
482<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42" 
483                   xmlns:avs="http://ddex.net/xml/avs">
484    <MessageHeader>
485        <MessageId>MSG001</MessageId>
486    </MessageHeader>
487</NewReleaseMessage>"#;
488
489        let mut detector = NamespaceDetector::new();
490        let cursor = Cursor::new(xml.as_bytes());
491        let result = detector.detect_from_xml(cursor).unwrap();
492
493        assert_eq!(result.version, Some(ERNVersion::V4_2));
494        assert_eq!(
495            result.default_namespace,
496            Some("http://ddex.net/xml/ern/42".to_string())
497        );
498        assert!(result.declarations.contains_key(""));
499    }
500
501    #[test]
502    fn test_custom_namespace_detection() {
503        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
504<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
505                       xmlns:custom="http://example.com/custom">
506    <ern:MessageHeader>
507        <custom:CustomElement>Test</custom:CustomElement>
508    </ern:MessageHeader>
509</ern:NewReleaseMessage>"#;
510
511        let mut detector = NamespaceDetector::new();
512        let cursor = Cursor::new(xml.as_bytes());
513        let result = detector.detect_from_xml(cursor).unwrap();
514
515        assert_eq!(result.custom_namespaces.len(), 1);
516        assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
517        assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
518    }
519
520    #[test]
521    fn test_namespace_scope_inheritance() {
522        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
523<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
524    <ern:MessageHeader xmlns:local="http://example.com/local">
525        <local:LocalElement>
526            <ern:ErnElement />
527        </local:LocalElement>
528    </ern:MessageHeader>
529</ern:NewReleaseMessage>"#;
530
531        let mut detector = NamespaceDetector::new();
532        let cursor = Cursor::new(xml.as_bytes());
533        let result = detector.detect_from_xml(cursor).unwrap();
534
535        // Both namespaces should be detected
536        assert!(result.declarations.contains_key("ern"));
537        assert!(result.declarations.contains_key("local"));
538    }
539
540    #[test]
541    fn test_namespace_context() {
542        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
543<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
544                       xmlns:avs="http://ddex.net/xml/avs">
545</ern:NewReleaseMessage>"#;
546
547        let mut detector = NamespaceDetector::new();
548        let cursor = Cursor::new(xml.as_bytes());
549        let result = detector.detect_from_xml(cursor).unwrap();
550
551        let context = NamespaceContext::from_detection_result(result);
552
553        let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
554        match resolved {
555            ResolvedName::Qualified {
556                local_name,
557                namespace_uri,
558                prefix,
559            } => {
560                assert_eq!(local_name, "MessageHeader");
561                assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
562                assert_eq!(prefix, "ern");
563            }
564            _ => panic!("Expected qualified name"),
565        }
566    }
567}