ddex_core/models/
extensions.rs

1//! Extension preservation system for perfect round-trip fidelity
2//!
3//! This module provides comprehensive XML fragment preservation to maintain
4//! unknown elements, namespaces, and attributes that are not part of the
5//! DDEX schema. This ensures that proprietary extensions from music companies
6//! or custom implementations are preserved during parse → modify → build cycles.
7
8use indexmap::IndexMap;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11
12/// Comprehensive XML fragment preservation for round-trip fidelity
13#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
14pub struct XmlFragment {
15    /// The raw XML content as a string
16    pub raw_content: String,
17
18    /// Element name (local name without namespace prefix)
19    pub element_name: String,
20
21    /// Namespace URI for this element
22    pub namespace_uri: Option<String>,
23
24    /// Namespace prefix used in the original XML
25    pub namespace_prefix: Option<String>,
26
27    /// All namespace declarations on this element
28    pub namespace_declarations: IndexMap<String, String>, // prefix -> uri
29
30    /// All attributes on this element (including namespaced ones)
31    pub attributes: IndexMap<String, String>, // qualified name -> value
32
33    /// Child XML fragments (for nested unknown elements)
34    pub children: Vec<XmlFragment>,
35
36    /// Text content (if this element contains only text)
37    pub text_content: Option<String>,
38
39    /// Processing instructions within this fragment
40    pub processing_instructions: Vec<ProcessingInstruction>,
41
42    /// Comments within this fragment
43    pub comments: Vec<Comment>,
44
45    /// Position hint for canonical ordering
46    pub position_hint: Option<usize>,
47
48    /// Whether this fragment should be preserved as-is (no canonicalization)
49    pub preserve_formatting: bool,
50}
51
52/// Position of a comment relative to its parent element
53#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
54pub enum CommentPosition {
55    /// Comment appears before the element's opening tag
56    Before,
57    /// Comment appears after the element's opening tag but before any child content
58    FirstChild,
59    /// Comment appears after the last child content but before the closing tag
60    LastChild,
61    /// Comment appears after the element's closing tag
62    After,
63    /// Comment appears inline with the element (for text-only elements)
64    Inline,
65}
66
67/// Enhanced comment structure with position and location metadata
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct Comment {
70    /// The comment content (without <!-- --> markers)
71    pub content: String,
72
73    /// Position relative to the parent element
74    pub position: CommentPosition,
75
76    /// XPath-like location reference for precise positioning
77    pub xpath: Option<String>,
78
79    /// Line number in original XML (for debugging/tooling)
80    pub line_number: Option<usize>,
81
82    /// Column number in original XML (for debugging/tooling)
83    pub column_number: Option<usize>,
84
85    /// Whether this comment should be preserved during canonicalization
86    pub preserve_formatting: bool,
87
88    /// Processing hints for specific output formats
89    pub processing_hints: IndexMap<String, String>,
90}
91
92/// XML Processing Instruction
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct ProcessingInstruction {
95    /// The target of the processing instruction
96    pub target: String,
97
98    /// The data content of the processing instruction
99    pub data: Option<String>,
100}
101
102/// Extension container with location-aware storage
103#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
104pub struct Extensions {
105    /// Extensions organized by their location in the DDEX structure
106    pub fragments: IndexMap<String, XmlFragment>,
107
108    /// Global namespace declarations that should be preserved at document level
109    pub global_namespaces: IndexMap<String, String>, // prefix -> uri
110
111    /// Document-level processing instructions
112    pub document_processing_instructions: Vec<ProcessingInstruction>,
113
114    /// Document-level comments
115    pub document_comments: Vec<Comment>,
116
117    /// Legacy simple extensions (for backward compatibility)
118    #[serde(flatten)]
119    pub legacy_data: HashMap<String, serde_json::Value>,
120}
121
122impl XmlFragment {
123    /// Create a new XML fragment
124    pub fn new(element_name: String, raw_content: String) -> Self {
125        Self {
126            raw_content,
127            element_name,
128            namespace_uri: None,
129            namespace_prefix: None,
130            namespace_declarations: IndexMap::new(),
131            attributes: IndexMap::new(),
132            children: Vec::new(),
133            text_content: None,
134            processing_instructions: Vec::new(),
135            comments: Vec::new(),
136            position_hint: None,
137            preserve_formatting: false,
138        }
139    }
140
141    /// Create a fragment with namespace information
142    pub fn with_namespace(
143        element_name: String,
144        namespace_uri: Option<String>,
145        namespace_prefix: Option<String>,
146        raw_content: String,
147    ) -> Self {
148        Self {
149            raw_content,
150            element_name,
151            namespace_uri,
152            namespace_prefix,
153            namespace_declarations: IndexMap::new(),
154            attributes: IndexMap::new(),
155            children: Vec::new(),
156            text_content: None,
157            processing_instructions: Vec::new(),
158            comments: Vec::new(),
159            position_hint: None,
160            preserve_formatting: false,
161        }
162    }
163
164    /// Get the qualified name for this element
165    pub fn qualified_name(&self) -> String {
166        if let Some(ref prefix) = self.namespace_prefix {
167            format!("{}:{}", prefix, self.element_name)
168        } else {
169            self.element_name.clone()
170        }
171    }
172
173    /// Check if this fragment is from a specific namespace
174    pub fn is_from_namespace(&self, namespace_uri: &str) -> bool {
175        self.namespace_uri
176            .as_ref()
177            .is_some_and(|uri| uri == namespace_uri)
178    }
179
180    /// Add a child fragment
181    pub fn add_child(&mut self, child: XmlFragment) {
182        self.children.push(child);
183    }
184
185    /// Add an attribute
186    pub fn add_attribute(&mut self, name: String, value: String) {
187        self.attributes.insert(name, value);
188    }
189
190    /// Add a namespace declaration
191    pub fn add_namespace_declaration(&mut self, prefix: String, uri: String) {
192        self.namespace_declarations.insert(prefix, uri);
193    }
194
195    /// Set position hint for canonical ordering
196    pub fn set_position_hint(&mut self, position: usize) {
197        self.position_hint = Some(position);
198    }
199
200    /// Mark this fragment to preserve original formatting
201    pub fn preserve_formatting(&mut self) {
202        self.preserve_formatting = true;
203    }
204
205    /// Get the canonical XML representation with proper formatting
206    pub fn to_canonical_xml(&self, indent_level: usize) -> String {
207        if self.preserve_formatting {
208            return self.raw_content.clone();
209        }
210
211        let indent = "  ".repeat(indent_level);
212        let mut xml = String::new();
213
214        // Opening tag
215        xml.push_str(&format!("{}<{}", indent, self.qualified_name()));
216
217        // Namespace declarations (sorted for determinism)
218        let mut sorted_ns: Vec<_> = self.namespace_declarations.iter().collect();
219        sorted_ns.sort_by_key(|(prefix, _)| prefix.as_str());
220
221        for (prefix, uri) in sorted_ns {
222            if prefix.is_empty() {
223                xml.push_str(&format!(" xmlns=\"{}\"", uri));
224            } else {
225                xml.push_str(&format!(" xmlns:{}=\"{}\"", prefix, uri));
226            }
227        }
228
229        // Attributes (sorted for determinism)
230        let mut sorted_attrs: Vec<_> = self.attributes.iter().collect();
231        sorted_attrs.sort_by_key(|(name, _)| name.as_str());
232
233        for (name, value) in sorted_attrs {
234            xml.push_str(&format!(
235                " {}=\"{}\"",
236                name,
237                html_escape::encode_double_quoted_attribute(value)
238            ));
239        }
240
241        if let Some(ref text) = self.text_content {
242            // Element with text content
243            xml.push('>');
244            xml.push_str(&html_escape::encode_text(text));
245            xml.push_str(&format!("</{}>", self.qualified_name()));
246        } else if self.children.is_empty()
247            && self.processing_instructions.is_empty()
248            && self.comments.is_empty()
249        {
250            // Self-closing element
251            xml.push_str("/>");
252        } else {
253            // Element with children
254            xml.push_str(">\n");
255
256            // Processing instructions
257            for pi in &self.processing_instructions {
258                xml.push_str(&format!("{}  <?{}", indent, pi.target));
259                if let Some(ref data) = pi.data {
260                    xml.push(' ');
261                    xml.push_str(data);
262                }
263                xml.push_str("?>\n");
264            }
265
266            // Comments
267            for comment in &self.comments {
268                let comment_indent = match comment.position {
269                    CommentPosition::Before | CommentPosition::After => indent.clone(),
270                    CommentPosition::FirstChild | CommentPosition::LastChild => {
271                        format!("{}  ", indent)
272                    }
273                    CommentPosition::Inline => String::new(),
274                };
275                xml.push_str(&format!("{}{}\n", comment_indent, comment.to_xml()));
276            }
277
278            // Child elements
279            for child in &self.children {
280                xml.push_str(&child.to_canonical_xml(indent_level + 1));
281                xml.push('\n');
282            }
283
284            xml.push_str(&format!("{}</{}>", indent, self.qualified_name()));
285        }
286
287        xml
288    }
289}
290
291impl Comment {
292    /// Create a new comment with minimal information
293    pub fn new(content: String, position: CommentPosition) -> Self {
294        Self {
295            content,
296            position,
297            xpath: None,
298            line_number: None,
299            column_number: None,
300            preserve_formatting: false,
301            processing_hints: IndexMap::new(),
302        }
303    }
304
305    /// Create a comment with location metadata
306    pub fn with_location(
307        content: String,
308        position: CommentPosition,
309        xpath: Option<String>,
310        line_number: Option<usize>,
311        column_number: Option<usize>,
312    ) -> Self {
313        Self {
314            content,
315            position,
316            xpath,
317            line_number,
318            column_number,
319            preserve_formatting: false,
320            processing_hints: IndexMap::new(),
321        }
322    }
323
324    /// Create a comment for document-level usage
325    pub fn document_comment(content: String) -> Self {
326        Self::new(content, CommentPosition::Before)
327    }
328
329    /// Set preservation of original formatting
330    pub fn preserve_formatting(mut self) -> Self {
331        self.preserve_formatting = true;
332        self
333    }
334
335    /// Add a processing hint
336    pub fn with_hint(mut self, key: String, value: String) -> Self {
337        self.processing_hints.insert(key, value);
338        self
339    }
340
341    /// Get canonical comment content with proper whitespace normalization
342    pub fn canonical_content(&self) -> String {
343        if self.preserve_formatting {
344            return self.content.clone();
345        }
346
347        // Normalize whitespace for canonical output
348        self.content.trim().to_string()
349    }
350
351    /// Format as XML comment with proper escaping
352    pub fn to_xml(&self) -> String {
353        let content = if self.preserve_formatting {
354            self.content.clone()
355        } else {
356            // Normalize whitespace and ensure no double dashes
357            self.content
358                .trim()
359                .replace("--", "- -")
360                .replace("<!--", "&lt;!--")
361                .replace("-->", "--&gt;")
362        };
363
364        format!("<!--{}-->", content)
365    }
366}
367
368impl ProcessingInstruction {
369    /// Create a new processing instruction
370    pub fn new(target: String, data: Option<String>) -> Self {
371        Self { target, data }
372    }
373}
374
375impl Extensions {
376    /// Create a new extensions container
377    pub fn new() -> Self {
378        Self {
379            fragments: IndexMap::new(),
380            global_namespaces: IndexMap::new(),
381            document_processing_instructions: Vec::new(),
382            document_comments: Vec::new(),
383            legacy_data: HashMap::new(),
384        }
385    }
386
387    /// Add an XML fragment at a specific location
388    pub fn add_fragment(&mut self, location: String, fragment: XmlFragment) {
389        self.fragments.insert(location, fragment);
390    }
391
392    /// Get a fragment by location
393    pub fn get_fragment(&self, location: &str) -> Option<&XmlFragment> {
394        self.fragments.get(location)
395    }
396
397    /// Get all fragments for a location pattern
398    pub fn get_fragments_matching(&self, pattern: &str) -> Vec<(&String, &XmlFragment)> {
399        self.fragments
400            .iter()
401            .filter(|(location, _)| location.starts_with(pattern))
402            .collect()
403    }
404
405    /// Add a global namespace declaration
406    pub fn add_global_namespace(&mut self, prefix: String, uri: String) {
407        self.global_namespaces.insert(prefix, uri);
408    }
409
410    /// Add a document-level processing instruction
411    pub fn add_document_processing_instruction(&mut self, pi: ProcessingInstruction) {
412        self.document_processing_instructions.push(pi);
413    }
414
415    /// Add a document-level comment
416    pub fn add_document_comment(&mut self, comment: String) {
417        self.document_comments
418            .push(Comment::document_comment(comment));
419    }
420
421    /// Add a structured document-level comment
422    pub fn add_document_comment_structured(&mut self, comment: Comment) {
423        self.document_comments.push(comment);
424    }
425
426    /// Legacy method for backward compatibility
427    pub fn insert(&mut self, key: String, value: serde_json::Value) {
428        self.legacy_data.insert(key, value);
429    }
430
431    /// Legacy method for backward compatibility
432    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
433        self.legacy_data.get(key)
434    }
435
436    /// Check if there are any extensions
437    pub fn is_empty(&self) -> bool {
438        self.fragments.is_empty()
439            && self.global_namespaces.is_empty()
440            && self.document_processing_instructions.is_empty()
441            && self.document_comments.is_empty()
442            && self.legacy_data.is_empty()
443    }
444
445    /// Get the total number of preserved extensions
446    pub fn count(&self) -> usize {
447        self.fragments.len()
448            + self.global_namespaces.len()
449            + self.document_processing_instructions.len()
450            + self.document_comments.len()
451            + self.legacy_data.len()
452    }
453
454    /// Merge another Extensions instance into this one
455    pub fn merge(&mut self, other: Extensions) {
456        for (location, fragment) in other.fragments {
457            self.fragments.insert(location, fragment);
458        }
459
460        for (prefix, uri) in other.global_namespaces {
461            self.global_namespaces.insert(prefix, uri);
462        }
463
464        self.document_processing_instructions
465            .extend(other.document_processing_instructions);
466        self.document_comments.extend(other.document_comments);
467
468        for (key, value) in other.legacy_data {
469            self.legacy_data.insert(key, value);
470        }
471    }
472
473    /// Clear all extensions
474    pub fn clear(&mut self) {
475        self.fragments.clear();
476        self.global_namespaces.clear();
477        self.document_processing_instructions.clear();
478        self.document_comments.clear();
479        self.legacy_data.clear();
480    }
481}
482
483/// Helper functions for extension management
484pub mod utils {
485    use super::*;
486
487    /// Generate a location key for an extension
488    /// Format: "element_path/namespace_uri/element_name"
489    pub fn generate_location_key(
490        element_path: &[&str],
491        namespace_uri: Option<&str>,
492        element_name: &str,
493    ) -> String {
494        let path = element_path.join("/");
495        match namespace_uri {
496            Some(ns) => format!("{}/{}/{}", path, ns, element_name),
497            None => format!("{}/{}", path, element_name),
498        }
499    }
500
501    /// Check if a namespace URI is a known DDEX namespace
502    pub fn is_ddex_namespace(namespace_uri: &str) -> bool {
503        const DDEX_NAMESPACES: &[&str] = &[
504            "http://ddex.net/xml/ern/382",
505            "http://ddex.net/xml/ern/42",
506            "http://ddex.net/xml/ern/43",
507            "http://ddex.net/xml/avs",
508            "http://www.w3.org/2001/XMLSchema-instance",
509        ];
510
511        DDEX_NAMESPACES.contains(&namespace_uri)
512    }
513
514    /// Extract namespace prefix from a qualified name
515    pub fn extract_namespace_prefix(qualified_name: &str) -> Option<&str> {
516        qualified_name
517            .split(':')
518            .next()
519            .filter(|prefix| !prefix.is_empty())
520    }
521
522    /// Extract local name from a qualified name
523    pub fn extract_local_name(qualified_name: &str) -> &str {
524        qualified_name
525            .split(':')
526            .next_back()
527            .unwrap_or(qualified_name)
528    }
529
530    /// Validate XML fragment content
531    pub fn validate_xml_fragment(fragment: &XmlFragment) -> Result<(), String> {
532        if fragment.element_name.is_empty() {
533            return Err("Element name cannot be empty".to_string());
534        }
535
536        if fragment.raw_content.is_empty() {
537            return Err("Raw content cannot be empty".to_string());
538        }
539
540        // Additional validation can be added here
541        Ok(())
542    }
543}
544
545#[cfg(test)]
546mod tests {
547    use super::*;
548
549    #[test]
550    fn test_xml_fragment_creation() {
551        let fragment = XmlFragment::new(
552            "customElement".to_string(),
553            "<customElement>content</customElement>".to_string(),
554        );
555
556        assert_eq!(fragment.element_name, "customElement");
557        assert_eq!(
558            fragment.raw_content,
559            "<customElement>content</customElement>"
560        );
561        assert_eq!(fragment.qualified_name(), "customElement");
562    }
563
564    #[test]
565    fn test_xml_fragment_with_namespace() {
566        let fragment = XmlFragment::with_namespace(
567            "customElement".to_string(),
568            Some("http://example.com/custom".to_string()),
569            Some("custom".to_string()),
570            "<custom:customElement>content</custom:customElement>".to_string(),
571        );
572
573        assert_eq!(fragment.qualified_name(), "custom:customElement");
574        assert!(fragment.is_from_namespace("http://example.com/custom"));
575    }
576
577    #[test]
578    fn test_extensions_container() {
579        let mut extensions = Extensions::new();
580        assert!(extensions.is_empty());
581
582        let fragment = XmlFragment::new("test".to_string(), "<test/>".to_string());
583
584        extensions.add_fragment("message/test".to_string(), fragment);
585        assert!(!extensions.is_empty());
586        assert_eq!(extensions.count(), 1);
587    }
588
589    #[test]
590    fn test_canonical_xml_generation() {
591        let mut fragment = XmlFragment::new(
592            "customElement".to_string(),
593            "<customElement attr=\"value\">text</customElement>".to_string(),
594        );
595
596        fragment.add_attribute("attr".to_string(), "value".to_string());
597        fragment.text_content = Some("text".to_string());
598
599        let xml = fragment.to_canonical_xml(0);
600        assert!(xml.contains("<customElement attr=\"value\">text</customElement>"));
601    }
602
603    #[test]
604    fn test_location_key_generation() {
605        let key = utils::generate_location_key(
606            &["message", "header"],
607            Some("http://example.com/ns"),
608            "customElement",
609        );
610
611        assert_eq!(key, "message/header/http://example.com/ns/customElement");
612    }
613
614    #[test]
615    fn test_ddex_namespace_detection() {
616        assert!(utils::is_ddex_namespace("http://ddex.net/xml/ern/43"));
617        assert!(utils::is_ddex_namespace("http://ddex.net/xml/avs"));
618        assert!(!utils::is_ddex_namespace("http://example.com/custom"));
619    }
620}
621
622#[cfg(test)]
623pub mod test_data;
624
625#[cfg(test)]
626mod comprehensive_tests;