ddex_parser/parser/
namespace_detector.rs

1//! # Namespace Detection and Management for DDEX Parser
2//! 
3//! This module provides comprehensive namespace detection and storage for DDEX XML parsing,
4//! handling scope inheritance, default namespaces, and custom extensions.
5
6use ddex_core::namespace::{NamespaceRegistry, NamespaceScope, NamespaceInfo, DDEXStandard, NamespaceWarning};
7use ddex_core::models::versions::ERNVersion;
8use indexmap::IndexMap;
9use quick_xml::events::{Event, BytesStart};
10use quick_xml::Reader;
11use std::io::BufRead;
12use crate::error::ParseError;
13use tracing::{debug, warn};
14
15/// Comprehensive namespace detection and management
16#[derive(Debug, Clone)]
17pub struct NamespaceDetector {
18    /// Registry of known namespaces
19    registry: NamespaceRegistry,
20    /// Stack of namespace scopes for element-level inheritance
21    scope_stack: Vec<NamespaceScope>,
22    /// All detected namespace declarations in document order
23    detected_namespaces: IndexMap<String, String>, // prefix -> uri
24    /// Namespace aliases found in the document
25    namespace_aliases: IndexMap<String, Vec<String>>, // uri -> [prefixes]
26    /// Default namespace stack
27    default_namespace_stack: Vec<Option<String>>,
28    /// Detected ERN version from namespaces
29    detected_version: Option<ERNVersion>,
30    /// Warnings collected during namespace processing
31    warnings: Vec<NamespaceWarning>,
32}
33
34/// Namespace detection result
35#[derive(Debug, Clone)]
36pub struct NamespaceDetectionResult {
37    /// All namespace declarations found
38    pub declarations: IndexMap<String, String>,
39    /// Detected ERN version
40    pub version: Option<ERNVersion>,
41    /// Namespace scope tree
42    pub root_scope: NamespaceScope,
43    /// Warnings about namespace usage
44    pub warnings: Vec<NamespaceWarning>,
45    /// Default namespace at root level
46    pub default_namespace: Option<String>,
47    /// Custom namespaces detected
48    pub custom_namespaces: Vec<NamespaceInfo>,
49}
50
51impl NamespaceDetector {
52    /// Create new namespace detector
53    pub fn new() -> Self {
54        Self {
55            registry: NamespaceRegistry::new(),
56            scope_stack: vec![NamespaceScope::new()],
57            detected_namespaces: IndexMap::new(),
58            namespace_aliases: IndexMap::new(),
59            default_namespace_stack: vec![None],
60            detected_version: None,
61            warnings: Vec::new(),
62        }
63    }
64
65    /// Detect namespaces from XML content
66    pub fn detect_from_xml<R: BufRead>(&mut self, reader: R) -> Result<NamespaceDetectionResult, ParseError> {
67        let mut xml_reader = Reader::from_reader(reader);
68        xml_reader.config_mut().trim_text(true);
69        
70        let mut buf = Vec::new();
71        
72        loop {
73            match xml_reader.read_event_into(&mut buf) {
74                Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
75                    self.process_start_element(e)?;
76                }
77                Ok(Event::End(_)) => {
78                    self.pop_namespace_scope();
79                }
80                Ok(Event::Eof) => break,
81                Ok(_) => {} // Ignore other events for namespace detection
82                Err(e) => return Err(ParseError::XmlError { 
83                    message: format!("XML parsing error: {}", e),
84                    location: crate::error::ErrorLocation::default(),
85                }),
86            }
87            buf.clear();
88        }
89
90        // Validate detected namespaces
91        self.validate_namespaces();
92        
93        Ok(self.build_result())
94    }
95
96    /// Process a start element for namespace declarations
97    fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
98        // Create new scope for this element
99        let current_scope = self.scope_stack.last().unwrap().clone();
100        let mut new_scope = current_scope.new_child();
101        
102        // Extract namespace declarations from attributes
103        let mut has_namespace_declarations = false;
104        let mut new_default_namespace = self.default_namespace_stack.last().cloned().unwrap_or(None);
105        
106        for attr_result in element.attributes() {
107            let attr = attr_result.map_err(|e| ParseError::XmlError { 
108                message: format!("Attribute error: {}", e),
109                location: crate::error::ErrorLocation::default(),
110            })?;
111            let key = String::from_utf8_lossy(attr.key.as_ref());
112            let value = String::from_utf8_lossy(&attr.value).to_string();
113            
114            if key == "xmlns" {
115                // Default namespace declaration
116                debug!("Found default namespace declaration: {}", value);
117                new_default_namespace = Some(value.clone());
118                new_scope.declare_namespace("".to_string(), value.clone());
119                self.detected_namespaces.insert("".to_string(), value.clone());
120                has_namespace_declarations = true;
121                
122                // Try to detect ERN version
123                if let Some(version) = self.registry.detect_version(&value) {
124                    if self.detected_version.is_none() {
125                        self.detected_version = Some(version);
126                        debug!("Detected ERN version: {:?} from namespace: {}", version, value);
127                    }
128                }
129            } else if key.starts_with("xmlns:") {
130                // Prefixed namespace declaration
131                let prefix = key.strip_prefix("xmlns:").unwrap_or("");
132                debug!("Found namespace declaration: {}={}", prefix, value);
133                
134                new_scope.declare_namespace(prefix.to_string(), value.clone());
135                self.detected_namespaces.insert(prefix.to_string(), value.clone());
136                has_namespace_declarations = true;
137                
138                // Track namespace aliases
139                self.namespace_aliases
140                    .entry(value.clone())
141                    .or_insert_with(Vec::new)
142                    .push(prefix.to_string());
143                
144                // Try to detect ERN version
145                if let Some(version) = self.registry.detect_version(&value) {
146                    if self.detected_version.is_none() {
147                        self.detected_version = Some(version);
148                        debug!("Detected ERN version: {:?} from namespace: {}", version, value);
149                    }
150                }
151            }
152        }
153        
154        // Push new scope and default namespace
155        self.scope_stack.push(new_scope);
156        self.default_namespace_stack.push(new_default_namespace);
157        
158        Ok(())
159    }
160
161    /// Pop namespace scope when closing an element
162    fn pop_namespace_scope(&mut self) {
163        if self.scope_stack.len() > 1 {
164            self.scope_stack.pop();
165        }
166        if self.default_namespace_stack.len() > 1 {
167            self.default_namespace_stack.pop();
168        }
169    }
170
171    /// Validate detected namespaces against known standards
172    fn validate_namespaces(&mut self) {
173        let validation_warnings = self.registry.validate_declarations(&self.detected_namespaces);
174        self.warnings.extend(validation_warnings);
175    }
176
177    /// Build the final detection result
178    fn build_result(&self) -> NamespaceDetectionResult {
179        // Identify custom namespaces
180        let mut custom_namespaces = Vec::new();
181        for (prefix, uri) in &self.detected_namespaces {
182            if self.registry.get_namespace_info(uri).is_none() {
183                // This is a custom namespace
184                let custom_info = NamespaceInfo {
185                    uri: uri.clone(),
186                    preferred_prefix: prefix.clone(),
187                    alternative_prefixes: self.namespace_aliases.get(uri)
188                        .cloned()
189                        .unwrap_or_default()
190                        .into_iter()
191                        .filter(|p| p != prefix)
192                        .collect(),
193                    standard: DDEXStandard::Custom("Unknown".to_string()),
194                    version: None,
195                    required: false,
196                };
197                custom_namespaces.push(custom_info);
198            }
199        }
200
201        NamespaceDetectionResult {
202            declarations: self.detected_namespaces.clone(),
203            version: self.detected_version,
204            root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
205            warnings: self.warnings.clone(),
206            default_namespace: self.detected_namespaces.get("").cloned(),
207            custom_namespaces,
208        }
209    }
210
211    /// Get current namespace scope
212    pub fn current_scope(&self) -> &NamespaceScope {
213        self.scope_stack.last().unwrap()
214    }
215
216    /// Resolve a prefix to its URI in current scope
217    pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
218        self.current_scope().resolve_prefix(prefix)
219    }
220
221    /// Get default namespace in current scope
222    pub fn get_default_namespace(&self) -> Option<&String> {
223        self.default_namespace_stack.last().unwrap().as_ref()
224    }
225
226    /// Check if a namespace is declared in current scope
227    pub fn is_namespace_declared(&self, uri: &str) -> bool {
228        self.current_scope().is_namespace_declared(uri)
229    }
230
231    /// Find prefix for a namespace URI in current scope
232    pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
233        self.current_scope().find_prefix_for_uri(uri)
234    }
235
236    /// Add a warning
237    pub fn add_warning(&mut self, warning: NamespaceWarning) {
238        warn!("Namespace warning: {}", warning);
239        self.warnings.push(warning);
240    }
241
242    /// Get detected ERN version
243    pub fn get_detected_version(&self) -> Option<ERNVersion> {
244        self.detected_version
245    }
246
247    /// Get all detected namespace declarations
248    pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
249        &self.detected_namespaces
250    }
251
252    /// Get namespace aliases
253    pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
254        &self.namespace_aliases
255    }
256}
257
258/// Namespace context for maintaining state during parsing
259#[derive(Debug, Clone)]
260pub struct NamespaceContext {
261    /// Current namespace scope
262    pub current_scope: NamespaceScope,
263    /// Detected namespaces at document level
264    pub document_namespaces: IndexMap<String, String>,
265    /// Current default namespace
266    pub default_namespace: Option<String>,
267    /// Detected ERN version
268    pub ern_version: Option<ERNVersion>,
269}
270
271impl NamespaceContext {
272    /// Create new namespace context from detection result
273    pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
274        Self {
275            current_scope: result.root_scope,
276            document_namespaces: result.declarations,
277            default_namespace: result.default_namespace,
278            ern_version: result.version,
279        }
280    }
281
282    /// Create a new child context for nested elements
283    pub fn create_child(&self) -> Self {
284        Self {
285            current_scope: self.current_scope.new_child(),
286            document_namespaces: self.document_namespaces.clone(),
287            default_namespace: self.default_namespace.clone(),
288            ern_version: self.ern_version,
289        }
290    }
291
292    /// Declare a namespace in current scope
293    pub fn declare_namespace(&mut self, prefix: String, uri: String) {
294        self.current_scope.declare_namespace(prefix, uri);
295    }
296
297    /// Resolve element name with namespace
298    pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
299        match prefix {
300            Some(p) => {
301                if let Some(uri) = self.document_namespaces.get(p) {
302                    ResolvedName::Qualified {
303                        local_name: local_name.to_string(),
304                        namespace_uri: uri.clone(),
305                        prefix: p.to_string(),
306                    }
307                } else {
308                    ResolvedName::Unresolved {
309                        local_name: local_name.to_string(),
310                        prefix: Some(p.to_string()),
311                    }
312                }
313            }
314            None => {
315                // Use default namespace if available
316                if let Some(uri) = &self.default_namespace {
317                    ResolvedName::Qualified {
318                        local_name: local_name.to_string(),
319                        namespace_uri: uri.clone(),
320                        prefix: "".to_string(),
321                    }
322                } else {
323                    ResolvedName::Unqualified {
324                        local_name: local_name.to_string(),
325                    }
326                }
327            }
328        }
329    }
330}
331
332/// Resolved element or attribute name
333#[derive(Debug, Clone, PartialEq)]
334pub enum ResolvedName {
335    /// Fully qualified name with namespace
336    Qualified {
337        local_name: String,
338        namespace_uri: String,
339        prefix: String,
340    },
341    /// Unqualified name (no namespace)
342    Unqualified {
343        local_name: String,
344    },
345    /// Unresolved prefix
346    Unresolved {
347        local_name: String,
348        prefix: Option<String>,
349    },
350}
351
352impl Default for NamespaceDetector {
353    fn default() -> Self {
354        Self::new()
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use std::io::Cursor;
362
363    #[test]
364    fn test_namespace_detection_ern_43() {
365        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
366<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
367                       xmlns:avs="http://ddex.net/xml/avs"
368                       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
369    <ern:MessageHeader>
370        <ern:MessageId>MSG001</ern:MessageId>
371    </ern:MessageHeader>
372</ern:NewReleaseMessage>"#;
373        
374        let mut detector = NamespaceDetector::new();
375        let cursor = Cursor::new(xml.as_bytes());
376        let result = detector.detect_from_xml(cursor).unwrap();
377        
378        assert_eq!(result.version, Some(ERNVersion::V4_3));
379        assert!(result.declarations.contains_key("ern"));
380        assert!(result.declarations.contains_key("avs"));
381        assert!(result.declarations.contains_key("xsi"));
382        assert_eq!(result.declarations.get("ern"), Some(&"http://ddex.net/xml/ern/43".to_string()));
383    }
384
385    #[test]
386    fn test_default_namespace_detection() {
387        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
388<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42" 
389                   xmlns:avs="http://ddex.net/xml/avs">
390    <MessageHeader>
391        <MessageId>MSG001</MessageId>
392    </MessageHeader>
393</NewReleaseMessage>"#;
394        
395        let mut detector = NamespaceDetector::new();
396        let cursor = Cursor::new(xml.as_bytes());
397        let result = detector.detect_from_xml(cursor).unwrap();
398        
399        assert_eq!(result.version, Some(ERNVersion::V4_2));
400        assert_eq!(result.default_namespace, Some("http://ddex.net/xml/ern/42".to_string()));
401        assert!(result.declarations.contains_key(""));
402    }
403
404    #[test]
405    fn test_custom_namespace_detection() {
406        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
407<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
408                       xmlns:custom="http://example.com/custom">
409    <ern:MessageHeader>
410        <custom:CustomElement>Test</custom:CustomElement>
411    </ern:MessageHeader>
412</ern:NewReleaseMessage>"#;
413        
414        let mut detector = NamespaceDetector::new();
415        let cursor = Cursor::new(xml.as_bytes());
416        let result = detector.detect_from_xml(cursor).unwrap();
417        
418        assert_eq!(result.custom_namespaces.len(), 1);
419        assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
420        assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
421    }
422
423    #[test]
424    fn test_namespace_scope_inheritance() {
425        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
426<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
427    <ern:MessageHeader xmlns:local="http://example.com/local">
428        <local:LocalElement>
429            <ern:ErnElement />
430        </local:LocalElement>
431    </ern:MessageHeader>
432</ern:NewReleaseMessage>"#;
433        
434        let mut detector = NamespaceDetector::new();
435        let cursor = Cursor::new(xml.as_bytes());
436        let result = detector.detect_from_xml(cursor).unwrap();
437        
438        // Both namespaces should be detected
439        assert!(result.declarations.contains_key("ern"));
440        assert!(result.declarations.contains_key("local"));
441    }
442
443    #[test]
444    fn test_namespace_context() {
445        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
446<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" 
447                       xmlns:avs="http://ddex.net/xml/avs">
448</ern:NewReleaseMessage>"#;
449        
450        let mut detector = NamespaceDetector::new();
451        let cursor = Cursor::new(xml.as_bytes());
452        let result = detector.detect_from_xml(cursor).unwrap();
453        
454        let context = NamespaceContext::from_detection_result(result);
455        
456        let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
457        match resolved {
458            ResolvedName::Qualified { local_name, namespace_uri, prefix } => {
459                assert_eq!(local_name, "MessageHeader");
460                assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
461                assert_eq!(prefix, "ern");
462            }
463            _ => panic!("Expected qualified name"),
464        }
465    }
466}