ddex_parser/parser/
namespace_detector.rs

1//! # Namespace Detection and Management for DDEX Parser
2//!
3//! This module provides comprehensive namespace detection and storage for DDEX XML parsing,
4//! handling scope inheritance, default namespaces, and custom extensions.
5
6use crate::error::ParseError;
7use crate::utf8_utils;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{
10    DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
11};
12use indexmap::IndexMap;
13use quick_xml::events::{BytesStart, Event};
14use quick_xml::Reader;
15use std::io::BufRead;
16use tracing::{debug, warn};
17
18/// Comprehensive namespace detection and management
19#[derive(Debug, Clone)]
20pub struct NamespaceDetector {
21    /// Registry of known namespaces
22    registry: NamespaceRegistry,
23    /// Stack of namespace scopes for element-level inheritance
24    scope_stack: Vec<NamespaceScope>,
25    /// All detected namespace declarations in document order
26    detected_namespaces: IndexMap<String, String>, // prefix -> uri
27    /// Namespace aliases found in the document
28    namespace_aliases: IndexMap<String, Vec<String>>, // uri -> [prefixes]
29    /// Default namespace stack
30    default_namespace_stack: Vec<Option<String>>,
31    /// Detected ERN version from namespaces
32    detected_version: Option<ERNVersion>,
33    /// Warnings collected during namespace processing
34    warnings: Vec<NamespaceWarning>,
35}
36
37/// Namespace detection result
38#[derive(Debug, Clone)]
39pub struct NamespaceDetectionResult {
40    /// All namespace declarations found
41    pub declarations: IndexMap<String, String>,
42    /// Detected ERN version
43    pub version: Option<ERNVersion>,
44    /// Namespace scope tree
45    pub root_scope: NamespaceScope,
46    /// Warnings about namespace usage
47    pub warnings: Vec<NamespaceWarning>,
48    /// Default namespace at root level
49    pub default_namespace: Option<String>,
50    /// Custom namespaces detected
51    pub custom_namespaces: Vec<NamespaceInfo>,
52}
53
54impl NamespaceDetector {
55    /// Create new namespace detector
56    pub fn new() -> Self {
57        Self {
58            registry: NamespaceRegistry::new(),
59            scope_stack: vec![NamespaceScope::new()],
60            detected_namespaces: IndexMap::new(),
61            namespace_aliases: IndexMap::new(),
62            default_namespace_stack: vec![None],
63            detected_version: None,
64            warnings: Vec::new(),
65        }
66    }
67
68    /// Detect namespaces from XML content with security limits
69    pub fn detect_from_xml<R: BufRead>(
70        &mut self,
71        reader: R,
72    ) -> Result<NamespaceDetectionResult, ParseError> {
73        self.detect_from_xml_with_security(
74            reader,
75            &crate::parser::security::SecurityConfig::default(),
76        )
77    }
78
79    /// Detect namespaces from XML content with custom security config
80    pub fn detect_from_xml_with_security<R: BufRead>(
81        &mut self,
82        reader: R,
83        security_config: &crate::parser::security::SecurityConfig,
84    ) -> Result<NamespaceDetectionResult, ParseError> {
85        let mut xml_reader = Reader::from_reader(reader);
86        xml_reader.config_mut().trim_text(true);
87
88        // Configure security settings
89        xml_reader.config_mut().expand_empty_elements = false;
90        if security_config.disable_dtd {
91            // Note: quick_xml doesn't have a direct DTD disable, but we check for DTDs manually
92        }
93
94        let mut buf = Vec::new();
95        let mut depth = 0;
96        let mut entity_expansions = 0;
97
98        loop {
99            match xml_reader.read_event_into(&mut buf) {
100                Ok(Event::Start(ref e)) => {
101                    depth += 1;
102
103                    // Check maximum nesting depth
104                    if depth > security_config.max_element_depth {
105                        return Err(ParseError::DepthLimitExceeded {
106                            depth,
107                            limit: security_config.max_element_depth,
108                        });
109                    }
110
111                    self.process_start_element(e)?;
112                }
113                Ok(Event::Empty(ref e)) => {
114                    depth += 1;
115
116                    // Check maximum nesting depth
117                    if depth > security_config.max_element_depth {
118                        return Err(ParseError::DepthLimitExceeded {
119                            depth,
120                            limit: security_config.max_element_depth,
121                        });
122                    }
123
124                    self.process_start_element(e)?;
125
126                    // For empty elements, immediately pop scope and decrement depth
127                    self.pop_namespace_scope();
128                    depth -= 1;
129                }
130                Ok(Event::End(_)) => {
131                    self.pop_namespace_scope();
132                    depth = depth.saturating_sub(1);
133                }
134                Ok(Event::Text(ref e)) => {
135                    // Use proper UTF-8 decoding for text content
136                    let current_pos = xml_reader.buffer_position() as usize;
137                    let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
138
139                    // Check for potential entity expansions (simple heuristic)
140                    if text.contains("&") {
141                        entity_expansions += text.matches("&").count();
142                        if entity_expansions > security_config.max_entity_expansions {
143                            return Err(ParseError::SecurityViolation {
144                                message: format!(
145                                    "Entity expansions {} exceed maximum allowed {}",
146                                    entity_expansions, security_config.max_entity_expansions
147                                ),
148                            });
149                        }
150                    }
151                }
152                Ok(Event::DocType(_)) if security_config.disable_dtd => {
153                    return Err(ParseError::SecurityViolation {
154                        message: "DTD declarations are disabled for security".to_string(),
155                    });
156                }
157                Ok(Event::Eof) => break,
158                Ok(_) => {} // Ignore other events for namespace detection
159                Err(e) => {
160                    return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
161                }
162            }
163            buf.clear();
164        }
165
166        // Validate detected namespaces
167        self.validate_namespaces();
168
169        Ok(self.build_result())
170    }
171
172    /// Process a start element for namespace declarations
173    fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
174        // Create new scope for this element
175        let current_scope = self.scope_stack.last().unwrap().clone();
176        let mut new_scope = current_scope.new_child();
177
178        // Extract namespace declarations from attributes
179        let mut _has_namespace_declarations = false;
180        let mut new_default_namespace =
181            self.default_namespace_stack.last().cloned().unwrap_or(None);
182
183        for attr_result in element.attributes() {
184            let attr = attr_result.map_err(|e| ParseError::XmlError(format!("Attribute error: {}", e)))?;
185            // Use proper UTF-8 decoding for attribute key and value
186            let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
187            let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
188
189            if key == "xmlns" {
190                // Default namespace declaration
191                debug!("Found default namespace declaration: {}", value);
192                new_default_namespace = Some(value.clone());
193                new_scope.declare_namespace("".to_string(), value.clone());
194                self.detected_namespaces
195                    .insert("".to_string(), value.clone());
196                _has_namespace_declarations = true;
197
198                // Try to detect ERN version
199                if let Some(version) = self.registry.detect_version(&value) {
200                    if self.detected_version.is_none() {
201                        self.detected_version = Some(version);
202                        debug!(
203                            "Detected ERN version: {:?} from namespace: {}",
204                            version, value
205                        );
206                    }
207                }
208            } else if key.starts_with("xmlns:") {
209                // Prefixed namespace declaration
210                let prefix = key.strip_prefix("xmlns:").unwrap_or("");
211                debug!("Found namespace declaration: {}={}", prefix, value);
212
213                new_scope.declare_namespace(prefix.to_string(), value.clone());
214                self.detected_namespaces
215                    .insert(prefix.to_string(), value.clone());
216                _has_namespace_declarations = true;
217
218                // Track namespace aliases
219                self.namespace_aliases
220                    .entry(value.clone())
221                    .or_default()
222                    .push(prefix.to_string());
223
224                // Try to detect ERN version
225                if let Some(version) = self.registry.detect_version(&value) {
226                    if self.detected_version.is_none() {
227                        self.detected_version = Some(version);
228                        debug!(
229                            "Detected ERN version: {:?} from namespace: {}",
230                            version, value
231                        );
232                    }
233                }
234            }
235        }
236
237        // Push new scope and default namespace
238        self.scope_stack.push(new_scope);
239        self.default_namespace_stack.push(new_default_namespace);
240
241        Ok(())
242    }
243
244    /// Pop namespace scope when closing an element
245    fn pop_namespace_scope(&mut self) {
246        if self.scope_stack.len() > 1 {
247            self.scope_stack.pop();
248        }
249        if self.default_namespace_stack.len() > 1 {
250            self.default_namespace_stack.pop();
251        }
252    }
253
254    /// Validate detected namespaces against known standards
255    fn validate_namespaces(&mut self) {
256        let validation_warnings = self
257            .registry
258            .validate_declarations(&self.detected_namespaces);
259        self.warnings.extend(validation_warnings);
260    }
261
262    /// Build the final detection result
263    fn build_result(&self) -> NamespaceDetectionResult {
264        // Identify custom namespaces
265        let mut custom_namespaces = Vec::new();
266        for (prefix, uri) in &self.detected_namespaces {
267            if self.registry.get_namespace_info(uri).is_none() {
268                // This is a custom namespace
269                let custom_info = NamespaceInfo {
270                    uri: uri.clone(),
271                    preferred_prefix: prefix.clone(),
272                    alternative_prefixes: self
273                        .namespace_aliases
274                        .get(uri)
275                        .cloned()
276                        .unwrap_or_default()
277                        .into_iter()
278                        .filter(|p| p != prefix)
279                        .collect(),
280                    standard: DDEXStandard::Custom("Unknown".to_string()),
281                    version: None,
282                    required: false,
283                };
284                custom_namespaces.push(custom_info);
285            }
286        }
287
288        NamespaceDetectionResult {
289            declarations: self.detected_namespaces.clone(),
290            version: self.detected_version,
291            root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
292            warnings: self.warnings.clone(),
293            default_namespace: self.detected_namespaces.get("").cloned(),
294            custom_namespaces,
295        }
296    }
297
298    /// Get current namespace scope
299    pub fn current_scope(&self) -> &NamespaceScope {
300        self.scope_stack.last().unwrap()
301    }
302
303    /// Resolve a prefix to its URI in current scope
304    pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
305        self.current_scope().resolve_prefix(prefix)
306    }
307
308    /// Get default namespace in current scope
309    pub fn get_default_namespace(&self) -> Option<&String> {
310        self.default_namespace_stack.last().unwrap().as_ref()
311    }
312
313    /// Check if a namespace is declared in current scope
314    pub fn is_namespace_declared(&self, uri: &str) -> bool {
315        self.current_scope().is_namespace_declared(uri)
316    }
317
318    /// Find prefix for a namespace URI in current scope
319    pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
320        self.current_scope().find_prefix_for_uri(uri)
321    }
322
323    /// Add a warning
324    pub fn add_warning(&mut self, warning: NamespaceWarning) {
325        warn!("Namespace warning: {}", warning);
326        self.warnings.push(warning);
327    }
328
329    /// Get detected ERN version
330    pub fn get_detected_version(&self) -> Option<ERNVersion> {
331        self.detected_version
332    }
333
334    /// Get all detected namespace declarations
335    pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
336        &self.detected_namespaces
337    }
338
339    /// Get namespace aliases
340    pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
341        &self.namespace_aliases
342    }
343}
344
345/// Namespace context for maintaining state during parsing
346#[derive(Debug, Clone)]
347pub struct NamespaceContext {
348    /// Current namespace scope
349    pub current_scope: NamespaceScope,
350    /// Detected namespaces at document level
351    pub document_namespaces: IndexMap<String, String>,
352    /// Current default namespace
353    pub default_namespace: Option<String>,
354    /// Detected ERN version
355    pub ern_version: Option<ERNVersion>,
356}
357
358impl NamespaceContext {
359    /// Create new namespace context from detection result
360    pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
361        Self {
362            current_scope: result.root_scope,
363            document_namespaces: result.declarations,
364            default_namespace: result.default_namespace,
365            ern_version: result.version,
366        }
367    }
368
369    /// Create a new child context for nested elements
370    pub fn create_child(&self) -> Self {
371        Self {
372            current_scope: self.current_scope.new_child(),
373            document_namespaces: self.document_namespaces.clone(),
374            default_namespace: self.default_namespace.clone(),
375            ern_version: self.ern_version,
376        }
377    }
378
379    /// Declare a namespace in current scope
380    pub fn declare_namespace(&mut self, prefix: String, uri: String) {
381        self.current_scope.declare_namespace(prefix, uri);
382    }
383
384    /// Resolve element name with namespace
385    pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
386        match prefix {
387            Some(p) => {
388                if let Some(uri) = self.document_namespaces.get(p) {
389                    ResolvedName::Qualified {
390                        local_name: local_name.to_string(),
391                        namespace_uri: uri.clone(),
392                        prefix: p.to_string(),
393                    }
394                } else {
395                    ResolvedName::Unresolved {
396                        local_name: local_name.to_string(),
397                        prefix: Some(p.to_string()),
398                    }
399                }
400            }
401            None => {
402                // Use default namespace if available
403                if let Some(uri) = &self.default_namespace {
404                    ResolvedName::Qualified {
405                        local_name: local_name.to_string(),
406                        namespace_uri: uri.clone(),
407                        prefix: "".to_string(),
408                    }
409                } else {
410                    ResolvedName::Unqualified {
411                        local_name: local_name.to_string(),
412                    }
413                }
414            }
415        }
416    }
417}
418
419/// Resolved element or attribute name
420#[derive(Debug, Clone, PartialEq)]
421pub enum ResolvedName {
422    /// Fully qualified name with namespace
423    Qualified {
424        local_name: String,
425        namespace_uri: String,
426        prefix: String,
427    },
428    /// Unqualified name (no namespace)
429    Unqualified { local_name: String },
430    /// Unresolved prefix
431    Unresolved {
432        local_name: String,
433        prefix: Option<String>,
434    },
435}
436
437impl Default for NamespaceDetector {
438    fn default() -> Self {
439        Self::new()
440    }
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446    use std::io::Cursor;
447
448    #[test]
449    fn test_namespace_detection_ern_43() {
450        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
451<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
452                       xmlns:avs="http://ddex.net/xml/avs"
453                       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
454    <ern:MessageHeader>
455        <ern:MessageId>MSG001</ern:MessageId>
456    </ern:MessageHeader>
457</ern:NewReleaseMessage>"#;
458
459        let mut detector = NamespaceDetector::new();
460        let cursor = Cursor::new(xml.as_bytes());
461        let result = detector.detect_from_xml(cursor).unwrap();
462
463        assert_eq!(result.version, Some(ERNVersion::V4_3));
464        assert!(result.declarations.contains_key("ern"));
465        assert!(result.declarations.contains_key("avs"));
466        assert!(result.declarations.contains_key("xsi"));
467        assert_eq!(
468            result.declarations.get("ern"),
469            Some(&"http://ddex.net/xml/ern/43".to_string())
470        );
471    }
472
473    #[test]
474    fn test_default_namespace_detection() {
475        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
476<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42" 
477                   xmlns:avs="http://ddex.net/xml/avs">
478    <MessageHeader>
479        <MessageId>MSG001</MessageId>
480    </MessageHeader>
481</NewReleaseMessage>"#;
482
483        let mut detector = NamespaceDetector::new();
484        let cursor = Cursor::new(xml.as_bytes());
485        let result = detector.detect_from_xml(cursor).unwrap();
486
487        assert_eq!(result.version, Some(ERNVersion::V4_2));
488        assert_eq!(
489            result.default_namespace,
490            Some("http://ddex.net/xml/ern/42".to_string())
491        );
492        assert!(result.declarations.contains_key(""));
493    }
494
495    #[test]
496    fn test_custom_namespace_detection() {
497        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
498<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
499                       xmlns:custom="http://example.com/custom">
500    <ern:MessageHeader>
501        <custom:CustomElement>Test</custom:CustomElement>
502    </ern:MessageHeader>
503</ern:NewReleaseMessage>"#;
504
505        let mut detector = NamespaceDetector::new();
506        let cursor = Cursor::new(xml.as_bytes());
507        let result = detector.detect_from_xml(cursor).unwrap();
508
509        assert_eq!(result.custom_namespaces.len(), 1);
510        assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
511        assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
512    }
513
514    #[test]
515    fn test_namespace_scope_inheritance() {
516        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
517<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
518    <ern:MessageHeader xmlns:local="http://example.com/local">
519        <local:LocalElement>
520            <ern:ErnElement />
521        </local:LocalElement>
522    </ern:MessageHeader>
523</ern:NewReleaseMessage>"#;
524
525        let mut detector = NamespaceDetector::new();
526        let cursor = Cursor::new(xml.as_bytes());
527        let result = detector.detect_from_xml(cursor).unwrap();
528
529        // Both namespaces should be detected
530        assert!(result.declarations.contains_key("ern"));
531        assert!(result.declarations.contains_key("local"));
532    }
533
534    #[test]
535    fn test_namespace_context() {
536        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
537<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
538                       xmlns:avs="http://ddex.net/xml/avs">
539</ern:NewReleaseMessage>"#;
540
541        let mut detector = NamespaceDetector::new();
542        let cursor = Cursor::new(xml.as_bytes());
543        let result = detector.detect_from_xml(cursor).unwrap();
544
545        let context = NamespaceContext::from_detection_result(result);
546
547        let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
548        match resolved {
549            ResolvedName::Qualified {
550                local_name,
551                namespace_uri,
552                prefix,
553            } => {
554                assert_eq!(local_name, "MessageHeader");
555                assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
556                assert_eq!(prefix, "ern");
557            }
558            _ => panic!("Expected qualified name"),
559        }
560    }
561}