1use crate::error::ParseError;
7use crate::utf8_utils;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{
10 DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
11};
12use indexmap::IndexMap;
13use quick_xml::events::{BytesStart, Event};
14use quick_xml::Reader;
15use std::io::BufRead;
16use tracing::{debug, warn};
17
18#[derive(Debug, Clone)]
20pub struct NamespaceDetector {
21 registry: NamespaceRegistry,
23 scope_stack: Vec<NamespaceScope>,
25 detected_namespaces: IndexMap<String, String>, namespace_aliases: IndexMap<String, Vec<String>>, default_namespace_stack: Vec<Option<String>>,
31 detected_version: Option<ERNVersion>,
33 warnings: Vec<NamespaceWarning>,
35}
36
37#[derive(Debug, Clone)]
39pub struct NamespaceDetectionResult {
40 pub declarations: IndexMap<String, String>,
42 pub version: Option<ERNVersion>,
44 pub root_scope: NamespaceScope,
46 pub warnings: Vec<NamespaceWarning>,
48 pub default_namespace: Option<String>,
50 pub custom_namespaces: Vec<NamespaceInfo>,
52}
53
54impl NamespaceDetector {
55 pub fn new() -> Self {
57 Self {
58 registry: NamespaceRegistry::new(),
59 scope_stack: vec![NamespaceScope::new()],
60 detected_namespaces: IndexMap::new(),
61 namespace_aliases: IndexMap::new(),
62 default_namespace_stack: vec![None],
63 detected_version: None,
64 warnings: Vec::new(),
65 }
66 }
67
68 pub fn detect_from_xml<R: BufRead>(
70 &mut self,
71 reader: R,
72 ) -> Result<NamespaceDetectionResult, ParseError> {
73 self.detect_from_xml_with_security(
74 reader,
75 &crate::parser::security::SecurityConfig::default(),
76 )
77 }
78
79 pub fn detect_from_xml_with_security<R: BufRead>(
81 &mut self,
82 reader: R,
83 security_config: &crate::parser::security::SecurityConfig,
84 ) -> Result<NamespaceDetectionResult, ParseError> {
85 let mut xml_reader = Reader::from_reader(reader);
86 xml_reader.config_mut().trim_text(true);
87
88 xml_reader.config_mut().expand_empty_elements = false;
90 if security_config.disable_dtd {
91 }
93
94 let mut buf = Vec::new();
95 let mut depth = 0;
96 let mut entity_expansions = 0;
97
98 loop {
99 match xml_reader.read_event_into(&mut buf) {
100 Ok(Event::Start(ref e)) => {
101 depth += 1;
102
103 if depth > security_config.max_element_depth {
105 return Err(ParseError::DepthLimitExceeded {
106 depth,
107 max: security_config.max_element_depth,
108 });
109 }
110
111 self.process_start_element(e)?;
112 }
113 Ok(Event::Empty(ref e)) => {
114 depth += 1;
115
116 if depth > security_config.max_element_depth {
118 return Err(ParseError::DepthLimitExceeded {
119 depth,
120 max: security_config.max_element_depth,
121 });
122 }
123
124 self.process_start_element(e)?;
125
126 self.pop_namespace_scope();
128 depth -= 1;
129 }
130 Ok(Event::End(_)) => {
131 self.pop_namespace_scope();
132 depth = depth.saturating_sub(1);
133 }
134 Ok(Event::Text(ref e)) => {
135 let current_pos = xml_reader.buffer_position() as usize;
137 let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
138
139 if text.contains("&") {
141 entity_expansions += text.matches("&").count();
142 if entity_expansions > security_config.max_entity_expansions {
143 return Err(ParseError::SecurityViolation {
144 message: format!(
145 "Entity expansions {} exceed maximum allowed {}",
146 entity_expansions, security_config.max_entity_expansions
147 ),
148 });
149 }
150 }
151 }
152 Ok(Event::DocType(_)) if security_config.disable_dtd => {
153 return Err(ParseError::SecurityViolation {
154 message: "DTD declarations are disabled for security".to_string(),
155 });
156 }
157 Ok(Event::Eof) => break,
158 Ok(_) => {} Err(e) => {
160 return Err(ParseError::XmlError {
161 message: format!("XML parsing error: {}", e),
162 location: crate::error::ErrorLocation::default(),
163 })
164 }
165 }
166 buf.clear();
167 }
168
169 self.validate_namespaces();
171
172 Ok(self.build_result())
173 }
174
175 fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
177 let current_scope = self.scope_stack.last().unwrap().clone();
179 let mut new_scope = current_scope.new_child();
180
181 let mut _has_namespace_declarations = false;
183 let mut new_default_namespace =
184 self.default_namespace_stack.last().cloned().unwrap_or(None);
185
186 for attr_result in element.attributes() {
187 let attr = attr_result.map_err(|e| ParseError::XmlError {
188 message: format!("Attribute error: {}", e),
189 location: crate::error::ErrorLocation::default(),
190 })?;
191 let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
193 let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
194
195 if key == "xmlns" {
196 debug!("Found default namespace declaration: {}", value);
198 new_default_namespace = Some(value.clone());
199 new_scope.declare_namespace("".to_string(), value.clone());
200 self.detected_namespaces
201 .insert("".to_string(), value.clone());
202 _has_namespace_declarations = true;
203
204 if let Some(version) = self.registry.detect_version(&value) {
206 if self.detected_version.is_none() {
207 self.detected_version = Some(version);
208 debug!(
209 "Detected ERN version: {:?} from namespace: {}",
210 version, value
211 );
212 }
213 }
214 } else if key.starts_with("xmlns:") {
215 let prefix = key.strip_prefix("xmlns:").unwrap_or("");
217 debug!("Found namespace declaration: {}={}", prefix, value);
218
219 new_scope.declare_namespace(prefix.to_string(), value.clone());
220 self.detected_namespaces
221 .insert(prefix.to_string(), value.clone());
222 _has_namespace_declarations = true;
223
224 self.namespace_aliases
226 .entry(value.clone())
227 .or_default()
228 .push(prefix.to_string());
229
230 if let Some(version) = self.registry.detect_version(&value) {
232 if self.detected_version.is_none() {
233 self.detected_version = Some(version);
234 debug!(
235 "Detected ERN version: {:?} from namespace: {}",
236 version, value
237 );
238 }
239 }
240 }
241 }
242
243 self.scope_stack.push(new_scope);
245 self.default_namespace_stack.push(new_default_namespace);
246
247 Ok(())
248 }
249
250 fn pop_namespace_scope(&mut self) {
252 if self.scope_stack.len() > 1 {
253 self.scope_stack.pop();
254 }
255 if self.default_namespace_stack.len() > 1 {
256 self.default_namespace_stack.pop();
257 }
258 }
259
260 fn validate_namespaces(&mut self) {
262 let validation_warnings = self
263 .registry
264 .validate_declarations(&self.detected_namespaces);
265 self.warnings.extend(validation_warnings);
266 }
267
268 fn build_result(&self) -> NamespaceDetectionResult {
270 let mut custom_namespaces = Vec::new();
272 for (prefix, uri) in &self.detected_namespaces {
273 if self.registry.get_namespace_info(uri).is_none() {
274 let custom_info = NamespaceInfo {
276 uri: uri.clone(),
277 preferred_prefix: prefix.clone(),
278 alternative_prefixes: self
279 .namespace_aliases
280 .get(uri)
281 .cloned()
282 .unwrap_or_default()
283 .into_iter()
284 .filter(|p| p != prefix)
285 .collect(),
286 standard: DDEXStandard::Custom("Unknown".to_string()),
287 version: None,
288 required: false,
289 };
290 custom_namespaces.push(custom_info);
291 }
292 }
293
294 NamespaceDetectionResult {
295 declarations: self.detected_namespaces.clone(),
296 version: self.detected_version,
297 root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
298 warnings: self.warnings.clone(),
299 default_namespace: self.detected_namespaces.get("").cloned(),
300 custom_namespaces,
301 }
302 }
303
304 pub fn current_scope(&self) -> &NamespaceScope {
306 self.scope_stack.last().unwrap()
307 }
308
309 pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
311 self.current_scope().resolve_prefix(prefix)
312 }
313
314 pub fn get_default_namespace(&self) -> Option<&String> {
316 self.default_namespace_stack.last().unwrap().as_ref()
317 }
318
319 pub fn is_namespace_declared(&self, uri: &str) -> bool {
321 self.current_scope().is_namespace_declared(uri)
322 }
323
324 pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
326 self.current_scope().find_prefix_for_uri(uri)
327 }
328
329 pub fn add_warning(&mut self, warning: NamespaceWarning) {
331 warn!("Namespace warning: {}", warning);
332 self.warnings.push(warning);
333 }
334
335 pub fn get_detected_version(&self) -> Option<ERNVersion> {
337 self.detected_version
338 }
339
340 pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
342 &self.detected_namespaces
343 }
344
345 pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
347 &self.namespace_aliases
348 }
349}
350
351#[derive(Debug, Clone)]
353pub struct NamespaceContext {
354 pub current_scope: NamespaceScope,
356 pub document_namespaces: IndexMap<String, String>,
358 pub default_namespace: Option<String>,
360 pub ern_version: Option<ERNVersion>,
362}
363
364impl NamespaceContext {
365 pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
367 Self {
368 current_scope: result.root_scope,
369 document_namespaces: result.declarations,
370 default_namespace: result.default_namespace,
371 ern_version: result.version,
372 }
373 }
374
375 pub fn create_child(&self) -> Self {
377 Self {
378 current_scope: self.current_scope.new_child(),
379 document_namespaces: self.document_namespaces.clone(),
380 default_namespace: self.default_namespace.clone(),
381 ern_version: self.ern_version,
382 }
383 }
384
385 pub fn declare_namespace(&mut self, prefix: String, uri: String) {
387 self.current_scope.declare_namespace(prefix, uri);
388 }
389
390 pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
392 match prefix {
393 Some(p) => {
394 if let Some(uri) = self.document_namespaces.get(p) {
395 ResolvedName::Qualified {
396 local_name: local_name.to_string(),
397 namespace_uri: uri.clone(),
398 prefix: p.to_string(),
399 }
400 } else {
401 ResolvedName::Unresolved {
402 local_name: local_name.to_string(),
403 prefix: Some(p.to_string()),
404 }
405 }
406 }
407 None => {
408 if let Some(uri) = &self.default_namespace {
410 ResolvedName::Qualified {
411 local_name: local_name.to_string(),
412 namespace_uri: uri.clone(),
413 prefix: "".to_string(),
414 }
415 } else {
416 ResolvedName::Unqualified {
417 local_name: local_name.to_string(),
418 }
419 }
420 }
421 }
422 }
423}
424
425#[derive(Debug, Clone, PartialEq)]
427pub enum ResolvedName {
428 Qualified {
430 local_name: String,
431 namespace_uri: String,
432 prefix: String,
433 },
434 Unqualified { local_name: String },
436 Unresolved {
438 local_name: String,
439 prefix: Option<String>,
440 },
441}
442
443impl Default for NamespaceDetector {
444 fn default() -> Self {
445 Self::new()
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452 use std::io::Cursor;
453
454 #[test]
455 fn test_namespace_detection_ern_43() {
456 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
457<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
458 xmlns:avs="http://ddex.net/xml/avs"
459 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
460 <ern:MessageHeader>
461 <ern:MessageId>MSG001</ern:MessageId>
462 </ern:MessageHeader>
463</ern:NewReleaseMessage>"#;
464
465 let mut detector = NamespaceDetector::new();
466 let cursor = Cursor::new(xml.as_bytes());
467 let result = detector.detect_from_xml(cursor).unwrap();
468
469 assert_eq!(result.version, Some(ERNVersion::V4_3));
470 assert!(result.declarations.contains_key("ern"));
471 assert!(result.declarations.contains_key("avs"));
472 assert!(result.declarations.contains_key("xsi"));
473 assert_eq!(
474 result.declarations.get("ern"),
475 Some(&"http://ddex.net/xml/ern/43".to_string())
476 );
477 }
478
479 #[test]
480 fn test_default_namespace_detection() {
481 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
482<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42"
483 xmlns:avs="http://ddex.net/xml/avs">
484 <MessageHeader>
485 <MessageId>MSG001</MessageId>
486 </MessageHeader>
487</NewReleaseMessage>"#;
488
489 let mut detector = NamespaceDetector::new();
490 let cursor = Cursor::new(xml.as_bytes());
491 let result = detector.detect_from_xml(cursor).unwrap();
492
493 assert_eq!(result.version, Some(ERNVersion::V4_2));
494 assert_eq!(
495 result.default_namespace,
496 Some("http://ddex.net/xml/ern/42".to_string())
497 );
498 assert!(result.declarations.contains_key(""));
499 }
500
501 #[test]
502 fn test_custom_namespace_detection() {
503 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
504<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
505 xmlns:custom="http://example.com/custom">
506 <ern:MessageHeader>
507 <custom:CustomElement>Test</custom:CustomElement>
508 </ern:MessageHeader>
509</ern:NewReleaseMessage>"#;
510
511 let mut detector = NamespaceDetector::new();
512 let cursor = Cursor::new(xml.as_bytes());
513 let result = detector.detect_from_xml(cursor).unwrap();
514
515 assert_eq!(result.custom_namespaces.len(), 1);
516 assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
517 assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
518 }
519
520 #[test]
521 fn test_namespace_scope_inheritance() {
522 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
523<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
524 <ern:MessageHeader xmlns:local="http://example.com/local">
525 <local:LocalElement>
526 <ern:ErnElement />
527 </local:LocalElement>
528 </ern:MessageHeader>
529</ern:NewReleaseMessage>"#;
530
531 let mut detector = NamespaceDetector::new();
532 let cursor = Cursor::new(xml.as_bytes());
533 let result = detector.detect_from_xml(cursor).unwrap();
534
535 assert!(result.declarations.contains_key("ern"));
537 assert!(result.declarations.contains_key("local"));
538 }
539
540 #[test]
541 fn test_namespace_context() {
542 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
543<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
544 xmlns:avs="http://ddex.net/xml/avs">
545</ern:NewReleaseMessage>"#;
546
547 let mut detector = NamespaceDetector::new();
548 let cursor = Cursor::new(xml.as_bytes());
549 let result = detector.detect_from_xml(cursor).unwrap();
550
551 let context = NamespaceContext::from_detection_result(result);
552
553 let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
554 match resolved {
555 ResolvedName::Qualified {
556 local_name,
557 namespace_uri,
558 prefix,
559 } => {
560 assert_eq!(local_name, "MessageHeader");
561 assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
562 assert_eq!(prefix, "ern");
563 }
564 _ => panic!("Expected qualified name"),
565 }
566 }
567}