1use crate::utf8_utils;
8use ddex_core::models::{
9 extensions::utils, Comment, CommentPosition, Extensions, ProcessingInstruction, XmlFragment,
10};
11use indexmap::IndexMap;
12use quick_xml::{
13 events::{BytesEnd, BytesStart, BytesText, Event},
14 Reader,
15};
16
17#[derive(Debug, Clone)]
19pub struct ExtensionCaptureContext {
20 pub element_path: Vec<String>,
22
23 pub namespace_context: IndexMap<String, String>,
25
26 pub in_extension: bool,
28
29 pub extension_depth: usize,
31
32 pub extension_buffer: String,
34
35 pub current_extension: Option<XmlFragment>,
37
38 pub extensions: Extensions,
40
41 pub current_line: usize,
43
44 pub current_column: usize,
46}
47
48impl Default for ExtensionCaptureContext {
49 fn default() -> Self {
50 Self::new()
51 }
52}
53
54impl ExtensionCaptureContext {
55 pub fn new() -> Self {
57 Self {
58 element_path: Vec::new(),
59 namespace_context: IndexMap::new(),
60 in_extension: false,
61 extension_depth: 0,
62 extension_buffer: String::new(),
63 current_extension: None,
64 extensions: Extensions::new(),
65 current_line: 1,
66 current_column: 1,
67 }
68 }
69
70 pub fn enter_element(&mut self, element_name: &str) {
72 self.element_path.push(element_name.to_string());
73 }
74
75 pub fn exit_element(&mut self) -> Option<String> {
77 self.element_path.pop()
78 }
79
80 pub fn current_path(&self) -> String {
82 self.element_path.join("/")
83 }
84
85 pub fn add_namespace_declaration(&mut self, prefix: String, uri: String) {
87 self.namespace_context.insert(prefix.clone(), uri.clone());
88
89 if !utils::is_ddex_namespace(&uri) {
91 self.extensions.add_global_namespace(prefix, uri);
92 }
93 }
94
95 pub fn should_capture_element(&self, _element_name: &str, namespace_uri: Option<&str>) -> bool {
97 if self.in_extension {
99 return true;
100 }
101
102 if let Some(ns_uri) = namespace_uri {
104 return !utils::is_ddex_namespace(ns_uri);
105 }
106
107 false
110 }
111
112 pub fn start_extension_capture(
114 &mut self,
115 element_name: &str,
116 namespace_uri: Option<&str>,
117 namespace_prefix: Option<&str>,
118 ) {
119 self.in_extension = true;
120 self.extension_depth = 1;
121 self.extension_buffer.clear();
122
123 self.current_extension = Some(XmlFragment::with_namespace(
124 element_name.to_string(),
125 namespace_uri.map(String::from),
126 namespace_prefix.map(String::from),
127 String::new(), ));
129 }
130
131 pub fn add_extension_content(&mut self, content: &str) {
133 if self.in_extension {
134 self.extension_buffer.push_str(content);
135 }
136 }
137
138 pub fn process_extension_start_tag(&mut self, event: &BytesStart) {
140 if !self.in_extension {
141 return;
142 }
143
144 self.extension_depth += 1;
145 self.extension_buffer.push('<');
146 let element_name = utf8_utils::process_text_content_lossy(event.name().as_ref());
147 self.extension_buffer.push_str(&element_name);
148
149 for attr in event.attributes().flatten() {
151 self.extension_buffer.push(' ');
152 let key = utf8_utils::process_text_content_lossy(attr.key.as_ref());
153 let value = utf8_utils::process_text_content_lossy(&attr.value);
154
155 self.extension_buffer.push_str(&key);
156 self.extension_buffer.push_str("=\"");
157 self.extension_buffer.push_str(&value);
158 self.extension_buffer.push('"');
159
160 if let Some(ref mut ext) = self.current_extension {
162 ext.add_attribute(key, value);
163 }
164 }
165
166 self.extension_buffer.push('>');
167 }
168
169 pub fn process_extension_end_tag(&mut self, event: &BytesEnd) {
171 if !self.in_extension {
172 return;
173 }
174
175 self.extension_buffer.push_str("</");
176 self.extension_buffer
177 .push_str(std::str::from_utf8(event.name().as_ref()).unwrap_or("unknown"));
178 self.extension_buffer.push('>');
179
180 self.extension_depth -= 1;
181
182 if self.extension_depth == 0 {
184 self.finish_extension_capture();
185 }
186 }
187
188 pub fn process_extension_text(&mut self, event: &BytesText) {
190 if !self.in_extension {
191 return;
192 }
193
194 let text = event.unescape().unwrap_or_default();
195 self.extension_buffer.push_str(&text);
196
197 if let Some(ref mut ext) = self.current_extension {
199 if ext.children.is_empty() {
200 ext.text_content = Some(text.to_string());
201 }
202 }
203 }
204
205 pub fn finish_extension_capture(&mut self) {
207 if let Some(mut extension) = self.current_extension.take() {
208 extension.raw_content = self.extension_buffer.clone();
209
210 let namespace_uri = extension.namespace_uri.as_deref();
212 let location_key = utils::generate_location_key(
213 &self
214 .element_path
215 .iter()
216 .map(|s| s.as_str())
217 .collect::<Vec<_>>(),
218 namespace_uri,
219 &extension.element_name,
220 );
221
222 self.extensions.add_fragment(location_key, extension);
223 }
224
225 self.in_extension = false;
226 self.extension_depth = 0;
227 self.extension_buffer.clear();
228 }
229
230 pub fn add_processing_instruction(&mut self, target: String, data: Option<String>) {
232 let pi = ProcessingInstruction::new(target, data);
233 self.extensions.add_document_processing_instruction(pi);
234 }
235
236 pub fn add_comment(&mut self, comment: String) {
238 self.extensions.add_document_comment(comment);
239 }
240
241 pub fn add_comment_with_position(
243 &mut self,
244 comment: String,
245 position: CommentPosition,
246 line_number: Option<usize>,
247 column_number: Option<usize>,
248 ) {
249 let xpath = if !self.element_path.is_empty() {
250 Some(format!("/{}", self.element_path.join("/")))
251 } else {
252 None
253 };
254
255 let comment_struct =
256 Comment::with_location(comment, position, xpath, line_number, column_number);
257
258 if self.element_path.is_empty()
259 || matches!(position, CommentPosition::Before | CommentPosition::After)
260 {
261 self.extensions
263 .add_document_comment_structured(comment_struct);
264 } else {
265 if let Some(ref mut ext) = self.current_extension {
267 ext.comments.push(comment_struct);
268 } else {
269 self.extensions
271 .add_document_comment_structured(comment_struct);
272 }
273 }
274 }
275
276 pub fn into_extensions(self) -> Extensions {
278 self.extensions
279 }
280}
281
282pub struct ExtensionAwareParser {
284 pub context: ExtensionCaptureContext,
286
287 pub capture_extensions: bool,
289}
290
291impl ExtensionAwareParser {
292 pub fn new(capture_extensions: bool) -> Self {
294 Self {
295 context: ExtensionCaptureContext::new(),
296 capture_extensions,
297 }
298 }
299
300 pub fn parse_with_extensions(
302 &mut self,
303 xml_content: &str,
304 ) -> Result<Extensions, Box<dyn std::error::Error>> {
305 if !self.capture_extensions {
306 return Ok(Extensions::new());
307 }
308
309 let mut reader = Reader::from_str(xml_content);
310 reader.config_mut().trim_text(true);
311
312 let mut buf = Vec::new();
313
314 loop {
315 match reader.read_event_into(&mut buf) {
316 Ok(Event::Start(ref e)) => {
317 let element_name_bytes = e.name();
318 let element_name =
319 std::str::from_utf8(element_name_bytes.as_ref()).unwrap_or("unknown");
320
321 let (namespace_uri, namespace_prefix) = self.extract_namespace_info(e);
323
324 for attr in e.attributes().flatten() {
326 let key = std::str::from_utf8(attr.key.as_ref()).unwrap_or("");
327 if key.starts_with("xmlns") {
328 let prefix = if key == "xmlns" {
329 "".to_string()
330 } else {
331 key.strip_prefix("xmlns:").unwrap_or("").to_string()
332 };
333 let uri = String::from_utf8_lossy(&attr.value).to_string();
334 self.context.add_namespace_declaration(prefix, uri);
335 }
336 }
337
338 if self
340 .context
341 .should_capture_element(element_name, namespace_uri.as_deref())
342 {
343 if !self.context.in_extension {
344 self.context.start_extension_capture(
345 element_name,
346 namespace_uri.as_deref(),
347 namespace_prefix.as_deref(),
348 );
349 }
350 self.context.process_extension_start_tag(e);
351 } else {
352 self.context.enter_element(element_name);
353 }
354 }
355 Ok(Event::End(ref e)) => {
356 if self.context.in_extension {
357 self.context.process_extension_end_tag(e);
358 } else {
359 self.context.exit_element();
360 }
361 }
362 Ok(Event::Text(ref e)) => {
363 if self.context.in_extension {
364 self.context.process_extension_text(e);
365 }
366 }
367 Ok(Event::Comment(ref e)) => {
368 let comment = String::from_utf8_lossy(e);
369 if self.context.in_extension {
370 self.context
371 .add_extension_content(&format!("<!--{}-->", comment));
372 } else {
373 let position = if self.context.element_path.is_empty() {
375 CommentPosition::Before
376 } else {
377 CommentPosition::FirstChild
378 };
379
380 self.context.add_comment_with_position(
381 comment.trim().to_string(),
382 position,
383 Some(self.context.current_line),
384 Some(self.context.current_column),
385 );
386 }
387 }
388 Ok(Event::PI(ref e)) => {
389 let content = String::from_utf8_lossy(e);
390 if let Some(space_pos) = content.find(char::is_whitespace) {
392 let target = content[..space_pos].to_string();
393 let data = content[space_pos..].trim().to_string();
394 let data = if data.is_empty() { None } else { Some(data) };
395 self.context.add_processing_instruction(target, data);
396 } else {
397 self.context
398 .add_processing_instruction(content.to_string(), None);
399 }
400 }
401 Ok(Event::Eof) => break,
402 Err(e) => {
403 eprintln!("Warning: XML parsing error during extension capture: {}", e);
405 }
406 _ => {}
407 }
408 buf.clear();
409 }
410
411 Ok(self.context.extensions.clone())
412 }
413
414 fn extract_namespace_info(&self, event: &BytesStart) -> (Option<String>, Option<String>) {
416 let name_bytes = event.name();
417 let name = std::str::from_utf8(name_bytes.as_ref()).unwrap_or("unknown");
418
419 if let Some(colon_pos) = name.find(':') {
420 let prefix = &name[..colon_pos];
421 let namespace_uri = self.context.namespace_context.get(prefix).cloned();
422 (namespace_uri, Some(prefix.to_string()))
423 } else {
424 let default_ns = self.context.namespace_context.get("").cloned();
426 (default_ns, None)
427 }
428 }
429}
430
431pub mod capture_utils {
433 use super::*;
434
435 pub fn extract_extensions(xml_content: &str) -> Result<Extensions, Box<dyn std::error::Error>> {
437 let mut parser = ExtensionAwareParser::new(true);
438 parser.parse_with_extensions(xml_content)
439 }
440
441 pub fn has_extensions(xml_content: &str) -> bool {
443 match extract_extensions(xml_content) {
444 Ok(extensions) => !extensions.is_empty(),
445 Err(_) => false,
446 }
447 }
448
449 pub fn get_extension_stats(xml_content: &str) -> ExtensionStats {
451 match extract_extensions(xml_content) {
452 Ok(extensions) => ExtensionStats::from_extensions(&extensions),
453 Err(_) => ExtensionStats::default(),
454 }
455 }
456
457 #[derive(Debug, Clone, Default)]
459 pub struct ExtensionStats {
460 pub fragment_count: usize,
461 pub namespace_count: usize,
462 pub comment_count: usize,
463 pub processing_instruction_count: usize,
464 pub unique_namespaces: Vec<String>,
465 }
466
467 impl ExtensionStats {
468 fn from_extensions(extensions: &Extensions) -> Self {
469 let unique_namespaces = extensions.global_namespaces.values().cloned().collect();
470
471 Self {
472 fragment_count: extensions.fragments.len(),
473 namespace_count: extensions.global_namespaces.len(),
474 comment_count: extensions.document_comments.len(),
475 processing_instruction_count: extensions.document_processing_instructions.len(),
476 unique_namespaces,
477 }
478 }
479 }
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485
486 #[test]
487 fn test_extension_capture_context() {
488 let mut context = ExtensionCaptureContext::new();
489
490 context.enter_element("message");
491 context.enter_element("header");
492 assert_eq!(context.current_path(), "message/header");
493
494 context.exit_element();
495 assert_eq!(context.current_path(), "message");
496 }
497
498 #[test]
499 fn test_namespace_detection() {
500 let context = ExtensionCaptureContext::new();
501
502 assert!(!context.should_capture_element("Release", Some("http://ddex.net/xml/ern/43")));
504
505 assert!(context.should_capture_element("customElement", Some("http://example.com/custom")));
507 }
508
509 #[test]
510 fn test_extension_parsing() {
511 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
512<ern:NewReleaseMessage xmlns:ern="http://ddex.net/xml/ern/43" xmlns:custom="http://example.com/custom">
513 <MessageHeader>
514 <MessageId>MSG123</MessageId>
515 <custom:CustomField>Custom Value</custom:CustomField>
516 </MessageHeader>
517 <custom:CustomSection attr="value">
518 <custom:NestedElement>Nested Content</custom:NestedElement>
519 </custom:CustomSection>
520</ern:NewReleaseMessage>"#;
521
522 let extensions = capture_utils::extract_extensions(xml).unwrap();
523 assert!(!extensions.is_empty());
524 assert!(extensions.global_namespaces.contains_key("custom"));
525 assert_eq!(
526 extensions.global_namespaces["custom"],
527 "http://example.com/custom"
528 );
529 }
530
531 #[test]
532 fn test_processing_instruction_capture() {
533 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
534<?custom-instruction data="value"?>
535<root>content</root>"#;
536
537 let extensions = capture_utils::extract_extensions(xml).unwrap();
538 assert!(!extensions.document_processing_instructions.is_empty());
539 assert_eq!(
540 extensions.document_processing_instructions[0].target,
541 "custom-instruction"
542 );
543 }
544
545 #[test]
546 fn test_comment_capture() {
547 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
548<!-- This is a document comment -->
549<root>
550 <!-- This is an element comment -->
551 content
552</root>"#;
553
554 let extensions = capture_utils::extract_extensions(xml).unwrap();
555 assert!(!extensions.document_comments.is_empty());
556 }
557}