1use crate::error::ParseError;
7use crate::utf8_utils;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{
10 DDEXStandard, NamespaceInfo, NamespaceRegistry, NamespaceScope, NamespaceWarning,
11};
12use indexmap::IndexMap;
13use quick_xml::events::{BytesStart, Event};
14use quick_xml::Reader;
15use std::io::BufRead;
16use tracing::{debug, warn};
17
18#[derive(Debug, Clone)]
20pub struct NamespaceDetector {
21 registry: NamespaceRegistry,
23 scope_stack: Vec<NamespaceScope>,
25 detected_namespaces: IndexMap<String, String>, namespace_aliases: IndexMap<String, Vec<String>>, default_namespace_stack: Vec<Option<String>>,
31 detected_version: Option<ERNVersion>,
33 warnings: Vec<NamespaceWarning>,
35}
36
37#[derive(Debug, Clone)]
39pub struct NamespaceDetectionResult {
40 pub declarations: IndexMap<String, String>,
42 pub version: Option<ERNVersion>,
44 pub root_scope: NamespaceScope,
46 pub warnings: Vec<NamespaceWarning>,
48 pub default_namespace: Option<String>,
50 pub custom_namespaces: Vec<NamespaceInfo>,
52}
53
54impl NamespaceDetector {
55 pub fn new() -> Self {
57 Self {
58 registry: NamespaceRegistry::new(),
59 scope_stack: vec![NamespaceScope::new()],
60 detected_namespaces: IndexMap::new(),
61 namespace_aliases: IndexMap::new(),
62 default_namespace_stack: vec![None],
63 detected_version: None,
64 warnings: Vec::new(),
65 }
66 }
67
68 pub fn detect_from_xml<R: BufRead>(
70 &mut self,
71 reader: R,
72 ) -> Result<NamespaceDetectionResult, ParseError> {
73 self.detect_from_xml_with_security(
74 reader,
75 &crate::parser::security::SecurityConfig::default(),
76 )
77 }
78
79 pub fn detect_from_xml_with_security<R: BufRead>(
81 &mut self,
82 reader: R,
83 security_config: &crate::parser::security::SecurityConfig,
84 ) -> Result<NamespaceDetectionResult, ParseError> {
85 let mut xml_reader = Reader::from_reader(reader);
86 xml_reader.config_mut().trim_text(true);
87
88 xml_reader.config_mut().expand_empty_elements = false;
90 if security_config.disable_dtd {
91 }
93
94 let mut buf = Vec::new();
95 let mut depth = 0;
96 let mut entity_expansions = 0;
97
98 loop {
99 match xml_reader.read_event_into(&mut buf) {
100 Ok(Event::Start(ref e)) => {
101 depth += 1;
102
103 if depth > security_config.max_element_depth {
105 return Err(ParseError::DepthLimitExceeded {
106 depth,
107 limit: security_config.max_element_depth,
108 });
109 }
110
111 self.process_start_element(e)?;
112 }
113 Ok(Event::Empty(ref e)) => {
114 depth += 1;
115
116 if depth > security_config.max_element_depth {
118 return Err(ParseError::DepthLimitExceeded {
119 depth,
120 limit: security_config.max_element_depth,
121 });
122 }
123
124 self.process_start_element(e)?;
125
126 self.pop_namespace_scope();
128 depth -= 1;
129 }
130 Ok(Event::End(_)) => {
131 self.pop_namespace_scope();
132 depth = depth.saturating_sub(1);
133 }
134 Ok(Event::Text(ref e)) => {
135 let current_pos = xml_reader.buffer_position() as usize;
137 let text = utf8_utils::decode_utf8_at_position(e, current_pos)?;
138
139 if text.contains("&") {
141 entity_expansions += text.matches("&").count();
142 if entity_expansions > security_config.max_entity_expansions {
143 return Err(ParseError::SecurityViolation {
144 message: format!(
145 "Entity expansions {} exceed maximum allowed {}",
146 entity_expansions, security_config.max_entity_expansions
147 ),
148 });
149 }
150 }
151 }
152 Ok(Event::DocType(_)) if security_config.disable_dtd => {
153 return Err(ParseError::SecurityViolation {
154 message: "DTD declarations are disabled for security".to_string(),
155 });
156 }
157 Ok(Event::Eof) => break,
158 Ok(_) => {} Err(e) => {
160 return Err(ParseError::XmlError(format!("XML parsing error: {}", e)));
161 }
162 }
163 buf.clear();
164 }
165
166 self.validate_namespaces();
168
169 Ok(self.build_result())
170 }
171
172 fn process_start_element(&mut self, element: &BytesStart) -> Result<(), ParseError> {
174 let current_scope = self.scope_stack.last().unwrap().clone();
176 let mut new_scope = current_scope.new_child();
177
178 let mut _has_namespace_declarations = false;
180 let mut new_default_namespace =
181 self.default_namespace_stack.last().cloned().unwrap_or(None);
182
183 for attr_result in element.attributes() {
184 let attr = attr_result.map_err(|e| ParseError::XmlError(format!("Attribute error: {}", e)))?;
185 let key = utf8_utils::decode_attribute_name(attr.key.as_ref(), 0)?;
187 let value = utf8_utils::decode_attribute_value(&attr.value, 0)?;
188
189 if key == "xmlns" {
190 debug!("Found default namespace declaration: {}", value);
192 new_default_namespace = Some(value.clone());
193 new_scope.declare_namespace("".to_string(), value.clone());
194 self.detected_namespaces
195 .insert("".to_string(), value.clone());
196 _has_namespace_declarations = true;
197
198 if let Some(version) = self.registry.detect_version(&value) {
200 if self.detected_version.is_none() {
201 self.detected_version = Some(version);
202 debug!(
203 "Detected ERN version: {:?} from namespace: {}",
204 version, value
205 );
206 }
207 }
208 } else if key.starts_with("xmlns:") {
209 let prefix = key.strip_prefix("xmlns:").unwrap_or("");
211 debug!("Found namespace declaration: {}={}", prefix, value);
212
213 new_scope.declare_namespace(prefix.to_string(), value.clone());
214 self.detected_namespaces
215 .insert(prefix.to_string(), value.clone());
216 _has_namespace_declarations = true;
217
218 self.namespace_aliases
220 .entry(value.clone())
221 .or_default()
222 .push(prefix.to_string());
223
224 if let Some(version) = self.registry.detect_version(&value) {
226 if self.detected_version.is_none() {
227 self.detected_version = Some(version);
228 debug!(
229 "Detected ERN version: {:?} from namespace: {}",
230 version, value
231 );
232 }
233 }
234 }
235 }
236
237 self.scope_stack.push(new_scope);
239 self.default_namespace_stack.push(new_default_namespace);
240
241 Ok(())
242 }
243
244 fn pop_namespace_scope(&mut self) {
246 if self.scope_stack.len() > 1 {
247 self.scope_stack.pop();
248 }
249 if self.default_namespace_stack.len() > 1 {
250 self.default_namespace_stack.pop();
251 }
252 }
253
254 fn validate_namespaces(&mut self) {
256 let validation_warnings = self
257 .registry
258 .validate_declarations(&self.detected_namespaces);
259 self.warnings.extend(validation_warnings);
260 }
261
262 fn build_result(&self) -> NamespaceDetectionResult {
264 let mut custom_namespaces = Vec::new();
266 for (prefix, uri) in &self.detected_namespaces {
267 if self.registry.get_namespace_info(uri).is_none() {
268 let custom_info = NamespaceInfo {
270 uri: uri.clone(),
271 preferred_prefix: prefix.clone(),
272 alternative_prefixes: self
273 .namespace_aliases
274 .get(uri)
275 .cloned()
276 .unwrap_or_default()
277 .into_iter()
278 .filter(|p| p != prefix)
279 .collect(),
280 standard: DDEXStandard::Custom("Unknown".to_string()),
281 version: None,
282 required: false,
283 };
284 custom_namespaces.push(custom_info);
285 }
286 }
287
288 NamespaceDetectionResult {
289 declarations: self.detected_namespaces.clone(),
290 version: self.detected_version,
291 root_scope: self.scope_stack.first().cloned().unwrap_or_default(),
292 warnings: self.warnings.clone(),
293 default_namespace: self.detected_namespaces.get("").cloned(),
294 custom_namespaces,
295 }
296 }
297
298 pub fn current_scope(&self) -> &NamespaceScope {
300 self.scope_stack.last().unwrap()
301 }
302
303 pub fn resolve_prefix(&self, prefix: &str) -> Option<String> {
305 self.current_scope().resolve_prefix(prefix)
306 }
307
308 pub fn get_default_namespace(&self) -> Option<&String> {
310 self.default_namespace_stack.last().unwrap().as_ref()
311 }
312
313 pub fn is_namespace_declared(&self, uri: &str) -> bool {
315 self.current_scope().is_namespace_declared(uri)
316 }
317
318 pub fn find_prefix_for_uri(&self, uri: &str) -> Option<String> {
320 self.current_scope().find_prefix_for_uri(uri)
321 }
322
323 pub fn add_warning(&mut self, warning: NamespaceWarning) {
325 warn!("Namespace warning: {}", warning);
326 self.warnings.push(warning);
327 }
328
329 pub fn get_detected_version(&self) -> Option<ERNVersion> {
331 self.detected_version
332 }
333
334 pub fn get_detected_namespaces(&self) -> &IndexMap<String, String> {
336 &self.detected_namespaces
337 }
338
339 pub fn get_namespace_aliases(&self) -> &IndexMap<String, Vec<String>> {
341 &self.namespace_aliases
342 }
343}
344
345#[derive(Debug, Clone)]
347pub struct NamespaceContext {
348 pub current_scope: NamespaceScope,
350 pub document_namespaces: IndexMap<String, String>,
352 pub default_namespace: Option<String>,
354 pub ern_version: Option<ERNVersion>,
356}
357
358impl NamespaceContext {
359 pub fn from_detection_result(result: NamespaceDetectionResult) -> Self {
361 Self {
362 current_scope: result.root_scope,
363 document_namespaces: result.declarations,
364 default_namespace: result.default_namespace,
365 ern_version: result.version,
366 }
367 }
368
369 pub fn create_child(&self) -> Self {
371 Self {
372 current_scope: self.current_scope.new_child(),
373 document_namespaces: self.document_namespaces.clone(),
374 default_namespace: self.default_namespace.clone(),
375 ern_version: self.ern_version,
376 }
377 }
378
379 pub fn declare_namespace(&mut self, prefix: String, uri: String) {
381 self.current_scope.declare_namespace(prefix, uri);
382 }
383
384 pub fn resolve_element_name(&self, local_name: &str, prefix: Option<&str>) -> ResolvedName {
386 match prefix {
387 Some(p) => {
388 if let Some(uri) = self.document_namespaces.get(p) {
389 ResolvedName::Qualified {
390 local_name: local_name.to_string(),
391 namespace_uri: uri.clone(),
392 prefix: p.to_string(),
393 }
394 } else {
395 ResolvedName::Unresolved {
396 local_name: local_name.to_string(),
397 prefix: Some(p.to_string()),
398 }
399 }
400 }
401 None => {
402 if let Some(uri) = &self.default_namespace {
404 ResolvedName::Qualified {
405 local_name: local_name.to_string(),
406 namespace_uri: uri.clone(),
407 prefix: "".to_string(),
408 }
409 } else {
410 ResolvedName::Unqualified {
411 local_name: local_name.to_string(),
412 }
413 }
414 }
415 }
416 }
417}
418
419#[derive(Debug, Clone, PartialEq)]
421pub enum ResolvedName {
422 Qualified {
424 local_name: String,
425 namespace_uri: String,
426 prefix: String,
427 },
428 Unqualified { local_name: String },
430 Unresolved {
432 local_name: String,
433 prefix: Option<String>,
434 },
435}
436
437impl Default for NamespaceDetector {
438 fn default() -> Self {
439 Self::new()
440 }
441}
442
443#[cfg(test)]
444mod tests {
445 use super::*;
446 use std::io::Cursor;
447
448 #[test]
449 fn test_namespace_detection_ern_43() {
450 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
451<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
452 xmlns:avs="http://ddex.net/xml/avs"
453 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
454 <ern:MessageHeader>
455 <ern:MessageId>MSG001</ern:MessageId>
456 </ern:MessageHeader>
457</ern:NewReleaseMessage>"#;
458
459 let mut detector = NamespaceDetector::new();
460 let cursor = Cursor::new(xml.as_bytes());
461 let result = detector.detect_from_xml(cursor).unwrap();
462
463 assert_eq!(result.version, Some(ERNVersion::V4_3));
464 assert!(result.declarations.contains_key("ern"));
465 assert!(result.declarations.contains_key("avs"));
466 assert!(result.declarations.contains_key("xsi"));
467 assert_eq!(
468 result.declarations.get("ern"),
469 Some(&"http://ddex.net/xml/ern/43".to_string())
470 );
471 }
472
473 #[test]
474 fn test_default_namespace_detection() {
475 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
476<NewReleaseMessage xmlns="http://ddex.net/xml/ern/42"
477 xmlns:avs="http://ddex.net/xml/avs">
478 <MessageHeader>
479 <MessageId>MSG001</MessageId>
480 </MessageHeader>
481</NewReleaseMessage>"#;
482
483 let mut detector = NamespaceDetector::new();
484 let cursor = Cursor::new(xml.as_bytes());
485 let result = detector.detect_from_xml(cursor).unwrap();
486
487 assert_eq!(result.version, Some(ERNVersion::V4_2));
488 assert_eq!(
489 result.default_namespace,
490 Some("http://ddex.net/xml/ern/42".to_string())
491 );
492 assert!(result.declarations.contains_key(""));
493 }
494
495 #[test]
496 fn test_custom_namespace_detection() {
497 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
498<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
499 xmlns:custom="http://example.com/custom">
500 <ern:MessageHeader>
501 <custom:CustomElement>Test</custom:CustomElement>
502 </ern:MessageHeader>
503</ern:NewReleaseMessage>"#;
504
505 let mut detector = NamespaceDetector::new();
506 let cursor = Cursor::new(xml.as_bytes());
507 let result = detector.detect_from_xml(cursor).unwrap();
508
509 assert_eq!(result.custom_namespaces.len(), 1);
510 assert_eq!(result.custom_namespaces[0].uri, "http://example.com/custom");
511 assert_eq!(result.custom_namespaces[0].preferred_prefix, "custom");
512 }
513
514 #[test]
515 fn test_namespace_scope_inheritance() {
516 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
517<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43">
518 <ern:MessageHeader xmlns:local="http://example.com/local">
519 <local:LocalElement>
520 <ern:ErnElement />
521 </local:LocalElement>
522 </ern:MessageHeader>
523</ern:NewReleaseMessage>"#;
524
525 let mut detector = NamespaceDetector::new();
526 let cursor = Cursor::new(xml.as_bytes());
527 let result = detector.detect_from_xml(cursor).unwrap();
528
529 assert!(result.declarations.contains_key("ern"));
531 assert!(result.declarations.contains_key("local"));
532 }
533
534 #[test]
535 fn test_namespace_context() {
536 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
537<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43"
538 xmlns:avs="http://ddex.net/xml/avs">
539</ern:NewReleaseMessage>"#;
540
541 let mut detector = NamespaceDetector::new();
542 let cursor = Cursor::new(xml.as_bytes());
543 let result = detector.detect_from_xml(cursor).unwrap();
544
545 let context = NamespaceContext::from_detection_result(result);
546
547 let resolved = context.resolve_element_name("MessageHeader", Some("ern"));
548 match resolved {
549 ResolvedName::Qualified {
550 local_name,
551 namespace_uri,
552 prefix,
553 } => {
554 assert_eq!(local_name, "MessageHeader");
555 assert_eq!(namespace_uri, "http://ddex.net/xml/ern/43");
556 assert_eq!(prefix, "ern");
557 }
558 _ => panic!("Expected qualified name"),
559 }
560 }
561}