ddex_core/models/
extensions.rs

1//! Extension preservation system for perfect round-trip fidelity
2//!
3//! This module provides comprehensive XML fragment preservation to maintain
4//! unknown elements, namespaces, and attributes that are not part of the
5//! DDEX schema. This ensures that proprietary extensions from music companies
6//! or custom implementations are preserved during parse → modify → build cycles.
7
8use serde::{Deserialize, Serialize};
9use indexmap::IndexMap;
10use std::collections::HashMap;
11
12/// Comprehensive XML fragment preservation for round-trip fidelity
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub struct XmlFragment {
15    /// The raw XML content as a string
16    pub raw_content: String,
17    
18    /// Element name (local name without namespace prefix)
19    pub element_name: String,
20    
21    /// Namespace URI for this element
22    pub namespace_uri: Option<String>,
23    
24    /// Namespace prefix used in the original XML
25    pub namespace_prefix: Option<String>,
26    
27    /// All namespace declarations on this element
28    pub namespace_declarations: IndexMap<String, String>, // prefix -> uri
29    
30    /// All attributes on this element (including namespaced ones)
31    pub attributes: IndexMap<String, String>, // qualified name -> value
32    
33    /// Child XML fragments (for nested unknown elements)
34    pub children: Vec<XmlFragment>,
35    
36    /// Text content (if this element contains only text)
37    pub text_content: Option<String>,
38    
39    /// Processing instructions within this fragment
40    pub processing_instructions: Vec<ProcessingInstruction>,
41    
42    /// Comments within this fragment
43    pub comments: Vec<Comment>,
44    
45    /// Position hint for canonical ordering
46    pub position_hint: Option<usize>,
47    
48    /// Whether this fragment should be preserved as-is (no canonicalization)
49    pub preserve_formatting: bool,
50}
51
52/// Position of a comment relative to its parent element
53#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
54pub enum CommentPosition {
55    /// Comment appears before the element's opening tag
56    Before,
57    /// Comment appears after the element's opening tag but before any child content
58    FirstChild,
59    /// Comment appears after the last child content but before the closing tag
60    LastChild,
61    /// Comment appears after the element's closing tag
62    After,
63    /// Comment appears inline with the element (for text-only elements)
64    Inline,
65}
66
67/// Enhanced comment structure with position and location metadata
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct Comment {
70    /// The comment content (without <!-- --> markers)
71    pub content: String,
72    
73    /// Position relative to the parent element
74    pub position: CommentPosition,
75    
76    /// XPath-like location reference for precise positioning
77    pub xpath: Option<String>,
78    
79    /// Line number in original XML (for debugging/tooling)
80    pub line_number: Option<usize>,
81    
82    /// Column number in original XML (for debugging/tooling)
83    pub column_number: Option<usize>,
84    
85    /// Whether this comment should be preserved during canonicalization
86    pub preserve_formatting: bool,
87    
88    /// Processing hints for specific output formats
89    pub processing_hints: IndexMap<String, String>,
90}
91
92/// XML Processing Instruction
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct ProcessingInstruction {
95    /// The target of the processing instruction
96    pub target: String,
97    
98    /// The data content of the processing instruction
99    pub data: Option<String>,
100}
101
102/// Extension container with location-aware storage
103#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
104pub struct Extensions {
105    /// Extensions organized by their location in the DDEX structure
106    pub fragments: IndexMap<String, XmlFragment>,
107    
108    /// Global namespace declarations that should be preserved at document level
109    pub global_namespaces: IndexMap<String, String>, // prefix -> uri
110    
111    /// Document-level processing instructions
112    pub document_processing_instructions: Vec<ProcessingInstruction>,
113    
114    /// Document-level comments
115    pub document_comments: Vec<Comment>,
116    
117    /// Legacy simple extensions (for backward compatibility)
118    #[serde(flatten)]
119    pub legacy_data: HashMap<String, serde_json::Value>,
120}
121
122impl XmlFragment {
123    /// Create a new XML fragment
124    pub fn new(element_name: String, raw_content: String) -> Self {
125        Self {
126            raw_content,
127            element_name,
128            namespace_uri: None,
129            namespace_prefix: None,
130            namespace_declarations: IndexMap::new(),
131            attributes: IndexMap::new(),
132            children: Vec::new(),
133            text_content: None,
134            processing_instructions: Vec::new(),
135            comments: Vec::new(),
136            position_hint: None,
137            preserve_formatting: false,
138        }
139    }
140    
141    /// Create a fragment with namespace information
142    pub fn with_namespace(
143        element_name: String,
144        namespace_uri: Option<String>,
145        namespace_prefix: Option<String>,
146        raw_content: String,
147    ) -> Self {
148        Self {
149            raw_content,
150            element_name,
151            namespace_uri,
152            namespace_prefix,
153            namespace_declarations: IndexMap::new(),
154            attributes: IndexMap::new(),
155            children: Vec::new(),
156            text_content: None,
157            processing_instructions: Vec::new(),
158            comments: Vec::new(),
159            position_hint: None,
160            preserve_formatting: false,
161        }
162    }
163    
164    /// Get the qualified name for this element
165    pub fn qualified_name(&self) -> String {
166        if let Some(ref prefix) = self.namespace_prefix {
167            format!("{}:{}", prefix, self.element_name)
168        } else {
169            self.element_name.clone()
170        }
171    }
172    
173    /// Check if this fragment is from a specific namespace
174    pub fn is_from_namespace(&self, namespace_uri: &str) -> bool {
175        self.namespace_uri.as_ref().map_or(false, |uri| uri == namespace_uri)
176    }
177    
178    /// Add a child fragment
179    pub fn add_child(&mut self, child: XmlFragment) {
180        self.children.push(child);
181    }
182    
183    /// Add an attribute
184    pub fn add_attribute(&mut self, name: String, value: String) {
185        self.attributes.insert(name, value);
186    }
187    
188    /// Add a namespace declaration
189    pub fn add_namespace_declaration(&mut self, prefix: String, uri: String) {
190        self.namespace_declarations.insert(prefix, uri);
191    }
192    
193    /// Set position hint for canonical ordering
194    pub fn set_position_hint(&mut self, position: usize) {
195        self.position_hint = Some(position);
196    }
197    
198    /// Mark this fragment to preserve original formatting
199    pub fn preserve_formatting(&mut self) {
200        self.preserve_formatting = true;
201    }
202    
203    /// Get the canonical XML representation with proper formatting
204    pub fn to_canonical_xml(&self, indent_level: usize) -> String {
205        if self.preserve_formatting {
206            return self.raw_content.clone();
207        }
208        
209        let indent = "  ".repeat(indent_level);
210        let mut xml = String::new();
211        
212        // Opening tag
213        xml.push_str(&format!("{}<{}", indent, self.qualified_name()));
214        
215        // Namespace declarations (sorted for determinism)
216        let mut sorted_ns: Vec<_> = self.namespace_declarations.iter().collect();
217        sorted_ns.sort_by_key(|(prefix, _)| prefix.as_str());
218        
219        for (prefix, uri) in sorted_ns {
220            if prefix.is_empty() {
221                xml.push_str(&format!(" xmlns=\"{}\"", uri));
222            } else {
223                xml.push_str(&format!(" xmlns:{}=\"{}\"", prefix, uri));
224            }
225        }
226        
227        // Attributes (sorted for determinism)
228        let mut sorted_attrs: Vec<_> = self.attributes.iter().collect();
229        sorted_attrs.sort_by_key(|(name, _)| name.as_str());
230        
231        for (name, value) in sorted_attrs {
232            xml.push_str(&format!(" {}=\"{}\"", name, html_escape::encode_double_quoted_attribute(value)));
233        }
234        
235        if let Some(ref text) = self.text_content {
236            // Element with text content
237            xml.push('>');
238            xml.push_str(&html_escape::encode_text(text));
239            xml.push_str(&format!("</{}>", self.qualified_name()));
240        } else if self.children.is_empty() && self.processing_instructions.is_empty() && self.comments.is_empty() {
241            // Self-closing element
242            xml.push_str("/>");
243        } else {
244            // Element with children
245            xml.push_str(">\n");
246            
247            // Processing instructions
248            for pi in &self.processing_instructions {
249                xml.push_str(&format!("{}  <?{}", indent, pi.target));
250                if let Some(ref data) = pi.data {
251                    xml.push(' ');
252                    xml.push_str(data);
253                }
254                xml.push_str("?>\n");
255            }
256            
257            // Comments
258            for comment in &self.comments {
259                let comment_indent = match comment.position {
260                    CommentPosition::Before | CommentPosition::After => indent.clone(),
261                    CommentPosition::FirstChild | CommentPosition::LastChild => format!("{}  ", indent),
262                    CommentPosition::Inline => String::new(),
263                };
264                xml.push_str(&format!("{}{}\n", comment_indent, comment.to_xml()));
265            }
266            
267            // Child elements
268            for child in &self.children {
269                xml.push_str(&child.to_canonical_xml(indent_level + 1));
270                xml.push('\n');
271            }
272            
273            xml.push_str(&format!("{}</{}>", indent, self.qualified_name()));
274        }
275        
276        xml
277    }
278}
279
280impl Comment {
281    /// Create a new comment with minimal information
282    pub fn new(content: String, position: CommentPosition) -> Self {
283        Self {
284            content,
285            position,
286            xpath: None,
287            line_number: None,
288            column_number: None,
289            preserve_formatting: false,
290            processing_hints: IndexMap::new(),
291        }
292    }
293    
294    /// Create a comment with location metadata
295    pub fn with_location(
296        content: String, 
297        position: CommentPosition, 
298        xpath: Option<String>,
299        line_number: Option<usize>,
300        column_number: Option<usize>
301    ) -> Self {
302        Self {
303            content,
304            position,
305            xpath,
306            line_number,
307            column_number,
308            preserve_formatting: false,
309            processing_hints: IndexMap::new(),
310        }
311    }
312    
313    /// Create a comment for document-level usage
314    pub fn document_comment(content: String) -> Self {
315        Self::new(content, CommentPosition::Before)
316    }
317    
318    /// Set preservation of original formatting
319    pub fn preserve_formatting(mut self) -> Self {
320        self.preserve_formatting = true;
321        self
322    }
323    
324    /// Add a processing hint
325    pub fn with_hint(mut self, key: String, value: String) -> Self {
326        self.processing_hints.insert(key, value);
327        self
328    }
329    
330    /// Get canonical comment content with proper whitespace normalization
331    pub fn canonical_content(&self) -> String {
332        if self.preserve_formatting {
333            return self.content.clone();
334        }
335        
336        // Normalize whitespace for canonical output
337        self.content.trim().to_string()
338    }
339    
340    /// Format as XML comment with proper escaping
341    pub fn to_xml(&self) -> String {
342        let content = if self.preserve_formatting {
343            self.content.clone()
344        } else {
345            // Normalize whitespace and ensure no double dashes
346            self.content.trim()
347                .replace("--", "- -")
348                .replace("<!--", "&lt;!--")
349                .replace("-->", "--&gt;")
350        };
351        
352        format!("<!--{}-->", content)
353    }
354}
355
356impl ProcessingInstruction {
357    /// Create a new processing instruction
358    pub fn new(target: String, data: Option<String>) -> Self {
359        Self { target, data }
360    }
361}
362
363impl Extensions {
364    /// Create a new extensions container
365    pub fn new() -> Self {
366        Self {
367            fragments: IndexMap::new(),
368            global_namespaces: IndexMap::new(),
369            document_processing_instructions: Vec::new(),
370            document_comments: Vec::new(),
371            legacy_data: HashMap::new(),
372        }
373    }
374    
375    /// Add an XML fragment at a specific location
376    pub fn add_fragment(&mut self, location: String, fragment: XmlFragment) {
377        self.fragments.insert(location, fragment);
378    }
379    
380    /// Get a fragment by location
381    pub fn get_fragment(&self, location: &str) -> Option<&XmlFragment> {
382        self.fragments.get(location)
383    }
384    
385    /// Get all fragments for a location pattern
386    pub fn get_fragments_matching(&self, pattern: &str) -> Vec<(&String, &XmlFragment)> {
387        self.fragments
388            .iter()
389            .filter(|(location, _)| location.starts_with(pattern))
390            .collect()
391    }
392    
393    /// Add a global namespace declaration
394    pub fn add_global_namespace(&mut self, prefix: String, uri: String) {
395        self.global_namespaces.insert(prefix, uri);
396    }
397    
398    /// Add a document-level processing instruction
399    pub fn add_document_processing_instruction(&mut self, pi: ProcessingInstruction) {
400        self.document_processing_instructions.push(pi);
401    }
402    
403    /// Add a document-level comment
404    pub fn add_document_comment(&mut self, comment: String) {
405        self.document_comments.push(Comment::document_comment(comment));
406    }
407    
408    /// Add a structured document-level comment
409    pub fn add_document_comment_structured(&mut self, comment: Comment) {
410        self.document_comments.push(comment);
411    }
412    
413    /// Legacy method for backward compatibility
414    pub fn insert(&mut self, key: String, value: serde_json::Value) {
415        self.legacy_data.insert(key, value);
416    }
417    
418    /// Legacy method for backward compatibility
419    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
420        self.legacy_data.get(key)
421    }
422    
423    /// Check if there are any extensions
424    pub fn is_empty(&self) -> bool {
425        self.fragments.is_empty() 
426            && self.global_namespaces.is_empty()
427            && self.document_processing_instructions.is_empty()
428            && self.document_comments.is_empty()
429            && self.legacy_data.is_empty()
430    }
431    
432    /// Get the total number of preserved extensions
433    pub fn count(&self) -> usize {
434        self.fragments.len() 
435            + self.global_namespaces.len()
436            + self.document_processing_instructions.len()
437            + self.document_comments.len()
438            + self.legacy_data.len()
439    }
440    
441    /// Merge another Extensions instance into this one
442    pub fn merge(&mut self, other: Extensions) {
443        for (location, fragment) in other.fragments {
444            self.fragments.insert(location, fragment);
445        }
446        
447        for (prefix, uri) in other.global_namespaces {
448            self.global_namespaces.insert(prefix, uri);
449        }
450        
451        self.document_processing_instructions.extend(other.document_processing_instructions);
452        self.document_comments.extend(other.document_comments);
453        
454        for (key, value) in other.legacy_data {
455            self.legacy_data.insert(key, value);
456        }
457    }
458    
459    /// Clear all extensions
460    pub fn clear(&mut self) {
461        self.fragments.clear();
462        self.global_namespaces.clear();
463        self.document_processing_instructions.clear();
464        self.document_comments.clear();
465        self.legacy_data.clear();
466    }
467}
468
469/// Helper functions for extension management
470pub mod utils {
471    use super::*;
472    
473    /// Generate a location key for an extension
474    /// Format: "element_path/namespace_uri/element_name"
475    pub fn generate_location_key(
476        element_path: &[&str],
477        namespace_uri: Option<&str>,
478        element_name: &str,
479    ) -> String {
480        let path = element_path.join("/");
481        match namespace_uri {
482            Some(ns) => format!("{}/{}/{}", path, ns, element_name),
483            None => format!("{}/{}", path, element_name),
484        }
485    }
486    
487    /// Check if a namespace URI is a known DDEX namespace
488    pub fn is_ddex_namespace(namespace_uri: &str) -> bool {
489        const DDEX_NAMESPACES: &[&str] = &[
490            "http://ddex.net/xml/ern/382",
491            "http://ddex.net/xml/ern/42",
492            "http://ddex.net/xml/ern/43",
493            "http://ddex.net/xml/avs",
494            "http://www.w3.org/2001/XMLSchema-instance",
495        ];
496        
497        DDEX_NAMESPACES.contains(&namespace_uri)
498    }
499    
500    /// Extract namespace prefix from a qualified name
501    pub fn extract_namespace_prefix(qualified_name: &str) -> Option<&str> {
502        qualified_name.split(':').next().filter(|prefix| !prefix.is_empty())
503    }
504    
505    /// Extract local name from a qualified name
506    pub fn extract_local_name(qualified_name: &str) -> &str {
507        qualified_name.split(':').last().unwrap_or(qualified_name)
508    }
509    
510    /// Validate XML fragment content
511    pub fn validate_xml_fragment(fragment: &XmlFragment) -> Result<(), String> {
512        if fragment.element_name.is_empty() {
513            return Err("Element name cannot be empty".to_string());
514        }
515        
516        if fragment.raw_content.is_empty() {
517            return Err("Raw content cannot be empty".to_string());
518        }
519        
520        // Additional validation can be added here
521        Ok(())
522    }
523}
524
525#[cfg(test)]
526mod tests {
527    use super::*;
528    
529    #[test]
530    fn test_xml_fragment_creation() {
531        let fragment = XmlFragment::new(
532            "customElement".to_string(),
533            "<customElement>content</customElement>".to_string(),
534        );
535        
536        assert_eq!(fragment.element_name, "customElement");
537        assert_eq!(fragment.raw_content, "<customElement>content</customElement>");
538        assert_eq!(fragment.qualified_name(), "customElement");
539    }
540    
541    #[test]
542    fn test_xml_fragment_with_namespace() {
543        let fragment = XmlFragment::with_namespace(
544            "customElement".to_string(),
545            Some("http://example.com/custom".to_string()),
546            Some("custom".to_string()),
547            "<custom:customElement>content</custom:customElement>".to_string(),
548        );
549        
550        assert_eq!(fragment.qualified_name(), "custom:customElement");
551        assert!(fragment.is_from_namespace("http://example.com/custom"));
552    }
553    
554    #[test]
555    fn test_extensions_container() {
556        let mut extensions = Extensions::new();
557        assert!(extensions.is_empty());
558        
559        let fragment = XmlFragment::new(
560            "test".to_string(),
561            "<test/>".to_string(),
562        );
563        
564        extensions.add_fragment("message/test".to_string(), fragment);
565        assert!(!extensions.is_empty());
566        assert_eq!(extensions.count(), 1);
567    }
568    
569    #[test]
570    fn test_canonical_xml_generation() {
571        let mut fragment = XmlFragment::new(
572            "customElement".to_string(),
573            "<customElement attr=\"value\">text</customElement>".to_string(),
574        );
575        
576        fragment.add_attribute("attr".to_string(), "value".to_string());
577        fragment.text_content = Some("text".to_string());
578        
579        let xml = fragment.to_canonical_xml(0);
580        assert!(xml.contains("<customElement attr=\"value\">text</customElement>"));
581    }
582    
583    #[test]
584    fn test_location_key_generation() {
585        let key = utils::generate_location_key(
586            &["message", "header"],
587            Some("http://example.com/ns"),
588            "customElement",
589        );
590        
591        assert_eq!(key, "message/header/http://example.com/ns/customElement");
592    }
593    
594    #[test]
595    fn test_ddex_namespace_detection() {
596        assert!(utils::is_ddex_namespace("http://ddex.net/xml/ern/43"));
597        assert!(utils::is_ddex_namespace("http://ddex.net/xml/avs"));
598        assert!(!utils::is_ddex_namespace("http://example.com/custom"));
599    }
600}
601
602#[cfg(test)]
603pub mod test_data;
604
605#[cfg(test)]
606mod comprehensive_tests;