ddex-core 0.4.5

Shared DDEX data models and utilities
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
//! Extension preservation system for perfect round-trip fidelity
//!
//! This module provides comprehensive XML fragment preservation to maintain
//! unknown elements, namespaces, and attributes that are not part of the
//! DDEX schema. This ensures that proprietary extensions from music companies
//! or custom implementations are preserved during parse → modify → build cycles.

use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Comprehensive XML fragment preservation for round-trip fidelity
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct XmlFragment {
    /// The raw XML content as a string
    pub raw_content: String,

    /// Element name (local name without namespace prefix)
    pub element_name: String,

    /// Namespace URI for this element
    pub namespace_uri: Option<String>,

    /// Namespace prefix used in the original XML
    pub namespace_prefix: Option<String>,

    /// All namespace declarations on this element
    pub namespace_declarations: IndexMap<String, String>, // prefix -> uri

    /// All attributes on this element (including namespaced ones)
    pub attributes: IndexMap<String, String>, // qualified name -> value

    /// Child XML fragments (for nested unknown elements)
    pub children: Vec<XmlFragment>,

    /// Text content (if this element contains only text)
    pub text_content: Option<String>,

    /// Processing instructions within this fragment
    pub processing_instructions: Vec<ProcessingInstruction>,

    /// Comments within this fragment
    pub comments: Vec<Comment>,

    /// Position hint for canonical ordering
    pub position_hint: Option<usize>,

    /// Whether this fragment should be preserved as-is (no canonicalization)
    pub preserve_formatting: bool,
}

/// Position of a comment relative to its parent element
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum CommentPosition {
    /// Comment appears before the element's opening tag
    Before,
    /// Comment appears after the element's opening tag but before any child content
    FirstChild,
    /// Comment appears after the last child content but before the closing tag
    LastChild,
    /// Comment appears after the element's closing tag
    After,
    /// Comment appears inline with the element (for text-only elements)
    Inline,
}

/// Enhanced comment structure with position and location metadata
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Comment {
    /// The comment content (without <!-- --> markers)
    pub content: String,

    /// Position relative to the parent element
    pub position: CommentPosition,

    /// XPath-like location reference for precise positioning
    pub xpath: Option<String>,

    /// Line number in original XML (for debugging/tooling)
    pub line_number: Option<usize>,

    /// Column number in original XML (for debugging/tooling)
    pub column_number: Option<usize>,

    /// Whether this comment should be preserved during canonicalization
    pub preserve_formatting: bool,

    /// Processing hints for specific output formats
    pub processing_hints: IndexMap<String, String>,
}

/// XML Processing Instruction
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ProcessingInstruction {
    /// The target of the processing instruction
    pub target: String,

    /// The data content of the processing instruction
    pub data: Option<String>,
}

/// Extension container with location-aware storage
#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)]
pub struct Extensions {
    /// Extensions organized by their location in the DDEX structure
    pub fragments: IndexMap<String, XmlFragment>,

    /// Global namespace declarations that should be preserved at document level
    pub global_namespaces: IndexMap<String, String>, // prefix -> uri

    /// Document-level processing instructions
    pub document_processing_instructions: Vec<ProcessingInstruction>,

    /// Document-level comments
    pub document_comments: Vec<Comment>,

    /// Legacy simple extensions (for backward compatibility)
    #[serde(flatten)]
    pub legacy_data: HashMap<String, serde_json::Value>,
}

impl XmlFragment {
    /// Create a new XML fragment
    pub fn new(element_name: String, raw_content: String) -> Self {
        Self {
            raw_content,
            element_name,
            namespace_uri: None,
            namespace_prefix: None,
            namespace_declarations: IndexMap::new(),
            attributes: IndexMap::new(),
            children: Vec::new(),
            text_content: None,
            processing_instructions: Vec::new(),
            comments: Vec::new(),
            position_hint: None,
            preserve_formatting: false,
        }
    }

    /// Create a fragment with namespace information
    pub fn with_namespace(
        element_name: String,
        namespace_uri: Option<String>,
        namespace_prefix: Option<String>,
        raw_content: String,
    ) -> Self {
        Self {
            raw_content,
            element_name,
            namespace_uri,
            namespace_prefix,
            namespace_declarations: IndexMap::new(),
            attributes: IndexMap::new(),
            children: Vec::new(),
            text_content: None,
            processing_instructions: Vec::new(),
            comments: Vec::new(),
            position_hint: None,
            preserve_formatting: false,
        }
    }

    /// Get the qualified name for this element
    pub fn qualified_name(&self) -> String {
        if let Some(ref prefix) = self.namespace_prefix {
            format!("{}:{}", prefix, self.element_name)
        } else {
            self.element_name.clone()
        }
    }

    /// Check if this fragment is from a specific namespace
    pub fn is_from_namespace(&self, namespace_uri: &str) -> bool {
        self.namespace_uri
            .as_ref()
            .is_some_and(|uri| uri == namespace_uri)
    }

    /// Add a child fragment
    pub fn add_child(&mut self, child: XmlFragment) {
        self.children.push(child);
    }

    /// Add an attribute
    pub fn add_attribute(&mut self, name: String, value: String) {
        self.attributes.insert(name, value);
    }

    /// Add a namespace declaration
    pub fn add_namespace_declaration(&mut self, prefix: String, uri: String) {
        self.namespace_declarations.insert(prefix, uri);
    }

    /// Set position hint for canonical ordering
    pub fn set_position_hint(&mut self, position: usize) {
        self.position_hint = Some(position);
    }

    /// Mark this fragment to preserve original formatting
    pub fn preserve_formatting(&mut self) {
        self.preserve_formatting = true;
    }

    /// Get the canonical XML representation with proper formatting
    pub fn to_canonical_xml(&self, indent_level: usize) -> String {
        if self.preserve_formatting {
            return self.raw_content.clone();
        }

        let indent = "  ".repeat(indent_level);
        let mut xml = String::new();

        // Opening tag
        xml.push_str(&format!("{}<{}", indent, self.qualified_name()));

        // Namespace declarations (sorted for determinism)
        let mut sorted_ns: Vec<_> = self.namespace_declarations.iter().collect();
        sorted_ns.sort_by_key(|(prefix, _)| prefix.as_str());

        for (prefix, uri) in sorted_ns {
            if prefix.is_empty() {
                xml.push_str(&format!(" xmlns=\"{}\"", uri));
            } else {
                xml.push_str(&format!(" xmlns:{}=\"{}\"", prefix, uri));
            }
        }

        // Attributes (sorted for determinism)
        let mut sorted_attrs: Vec<_> = self.attributes.iter().collect();
        sorted_attrs.sort_by_key(|(name, _)| name.as_str());

        for (name, value) in sorted_attrs {
            xml.push_str(&format!(
                " {}=\"{}\"",
                name,
                html_escape::encode_double_quoted_attribute(value)
            ));
        }

        if let Some(ref text) = self.text_content {
            // Element with text content
            xml.push('>');
            xml.push_str(&html_escape::encode_text(text));
            xml.push_str(&format!("</{}>", self.qualified_name()));
        } else if self.children.is_empty()
            && self.processing_instructions.is_empty()
            && self.comments.is_empty()
        {
            // Self-closing element
            xml.push_str("/>");
        } else {
            // Element with children
            xml.push_str(">\n");

            // Processing instructions
            for pi in &self.processing_instructions {
                xml.push_str(&format!("{}  <?{}", indent, pi.target));
                if let Some(ref data) = pi.data {
                    xml.push(' ');
                    xml.push_str(data);
                }
                xml.push_str("?>\n");
            }

            // Comments
            for comment in &self.comments {
                let comment_indent = match comment.position {
                    CommentPosition::Before | CommentPosition::After => indent.clone(),
                    CommentPosition::FirstChild | CommentPosition::LastChild => {
                        format!("{}  ", indent)
                    }
                    CommentPosition::Inline => String::new(),
                };
                xml.push_str(&format!("{}{}\n", comment_indent, comment.to_xml()));
            }

            // Child elements
            for child in &self.children {
                xml.push_str(&child.to_canonical_xml(indent_level + 1));
                xml.push('\n');
            }

            xml.push_str(&format!("{}</{}>", indent, self.qualified_name()));
        }

        xml
    }
}

impl Comment {
    /// Create a new comment with minimal information
    pub fn new(content: String, position: CommentPosition) -> Self {
        Self {
            content,
            position,
            xpath: None,
            line_number: None,
            column_number: None,
            preserve_formatting: false,
            processing_hints: IndexMap::new(),
        }
    }

    /// Create a comment with location metadata
    pub fn with_location(
        content: String,
        position: CommentPosition,
        xpath: Option<String>,
        line_number: Option<usize>,
        column_number: Option<usize>,
    ) -> Self {
        Self {
            content,
            position,
            xpath,
            line_number,
            column_number,
            preserve_formatting: false,
            processing_hints: IndexMap::new(),
        }
    }

    /// Create a comment for document-level usage
    pub fn document_comment(content: String) -> Self {
        Self::new(content, CommentPosition::Before)
    }

    /// Set preservation of original formatting
    pub fn preserve_formatting(mut self) -> Self {
        self.preserve_formatting = true;
        self
    }

    /// Add a processing hint
    pub fn with_hint(mut self, key: String, value: String) -> Self {
        self.processing_hints.insert(key, value);
        self
    }

    /// Get canonical comment content with proper whitespace normalization
    pub fn canonical_content(&self) -> String {
        if self.preserve_formatting {
            return self.content.clone();
        }

        // Normalize whitespace for canonical output
        self.content.trim().to_string()
    }

    /// Format as XML comment with proper escaping
    pub fn to_xml(&self) -> String {
        let content = if self.preserve_formatting {
            self.content.clone()
        } else {
            // Normalize whitespace and ensure no double dashes
            self.content
                .trim()
                .replace("--", "- -")
                .replace("<!--", "&lt;!--")
                .replace("-->", "--&gt;")
        };

        format!("<!--{}-->", content)
    }
}

impl ProcessingInstruction {
    /// Create a new processing instruction
    pub fn new(target: String, data: Option<String>) -> Self {
        Self { target, data }
    }
}

impl Extensions {
    /// Create a new extensions container
    pub fn new() -> Self {
        Self {
            fragments: IndexMap::new(),
            global_namespaces: IndexMap::new(),
            document_processing_instructions: Vec::new(),
            document_comments: Vec::new(),
            legacy_data: HashMap::new(),
        }
    }

    /// Add an XML fragment at a specific location
    pub fn add_fragment(&mut self, location: String, fragment: XmlFragment) {
        self.fragments.insert(location, fragment);
    }

    /// Get a fragment by location
    pub fn get_fragment(&self, location: &str) -> Option<&XmlFragment> {
        self.fragments.get(location)
    }

    /// Get all fragments for a location pattern
    pub fn get_fragments_matching(&self, pattern: &str) -> Vec<(&String, &XmlFragment)> {
        self.fragments
            .iter()
            .filter(|(location, _)| location.starts_with(pattern))
            .collect()
    }

    /// Add a global namespace declaration
    pub fn add_global_namespace(&mut self, prefix: String, uri: String) {
        self.global_namespaces.insert(prefix, uri);
    }

    /// Add a document-level processing instruction
    pub fn add_document_processing_instruction(&mut self, pi: ProcessingInstruction) {
        self.document_processing_instructions.push(pi);
    }

    /// Add a document-level comment
    pub fn add_document_comment(&mut self, comment: String) {
        self.document_comments
            .push(Comment::document_comment(comment));
    }

    /// Add a structured document-level comment
    pub fn add_document_comment_structured(&mut self, comment: Comment) {
        self.document_comments.push(comment);
    }

    /// Legacy method for backward compatibility
    pub fn insert(&mut self, key: String, value: serde_json::Value) {
        self.legacy_data.insert(key, value);
    }

    /// Legacy method for backward compatibility
    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
        self.legacy_data.get(key)
    }

    /// Check if there are any extensions
    pub fn is_empty(&self) -> bool {
        self.fragments.is_empty()
            && self.global_namespaces.is_empty()
            && self.document_processing_instructions.is_empty()
            && self.document_comments.is_empty()
            && self.legacy_data.is_empty()
    }

    /// Get the total number of preserved extensions
    pub fn count(&self) -> usize {
        self.fragments.len()
            + self.global_namespaces.len()
            + self.document_processing_instructions.len()
            + self.document_comments.len()
            + self.legacy_data.len()
    }

    /// Merge another Extensions instance into this one
    pub fn merge(&mut self, other: Extensions) {
        for (location, fragment) in other.fragments {
            self.fragments.insert(location, fragment);
        }

        for (prefix, uri) in other.global_namespaces {
            self.global_namespaces.insert(prefix, uri);
        }

        self.document_processing_instructions
            .extend(other.document_processing_instructions);
        self.document_comments.extend(other.document_comments);

        for (key, value) in other.legacy_data {
            self.legacy_data.insert(key, value);
        }
    }

    /// Clear all extensions
    pub fn clear(&mut self) {
        self.fragments.clear();
        self.global_namespaces.clear();
        self.document_processing_instructions.clear();
        self.document_comments.clear();
        self.legacy_data.clear();
    }
}

/// Helper functions for extension management
pub mod utils {
    use super::*;

    /// Generate a location key for an extension
    /// Format: "element_path/namespace_uri/element_name"
    pub fn generate_location_key(
        element_path: &[&str],
        namespace_uri: Option<&str>,
        element_name: &str,
    ) -> String {
        let path = element_path.join("/");
        match namespace_uri {
            Some(ns) => format!("{}/{}/{}", path, ns, element_name),
            None => format!("{}/{}", path, element_name),
        }
    }

    /// Check if a namespace URI is a known DDEX namespace
    pub fn is_ddex_namespace(namespace_uri: &str) -> bool {
        const DDEX_NAMESPACES: &[&str] = &[
            "http://ddex.net/xml/ern/382",
            "http://ddex.net/xml/ern/42",
            "http://ddex.net/xml/ern/43",
            "http://ddex.net/xml/avs",
            "http://www.w3.org/2001/XMLSchema-instance",
        ];

        DDEX_NAMESPACES.contains(&namespace_uri)
    }

    /// Extract namespace prefix from a qualified name
    pub fn extract_namespace_prefix(qualified_name: &str) -> Option<&str> {
        qualified_name
            .split(':')
            .next()
            .filter(|prefix| !prefix.is_empty())
    }

    /// Extract local name from a qualified name
    pub fn extract_local_name(qualified_name: &str) -> &str {
        qualified_name
            .split(':')
            .next_back()
            .unwrap_or(qualified_name)
    }

    /// Validate XML fragment content
    pub fn validate_xml_fragment(fragment: &XmlFragment) -> Result<(), String> {
        if fragment.element_name.is_empty() {
            return Err("Element name cannot be empty".to_string());
        }

        if fragment.raw_content.is_empty() {
            return Err("Raw content cannot be empty".to_string());
        }

        // Additional validation can be added here
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_xml_fragment_creation() {
        let fragment = XmlFragment::new(
            "customElement".to_string(),
            "<customElement>content</customElement>".to_string(),
        );

        assert_eq!(fragment.element_name, "customElement");
        assert_eq!(
            fragment.raw_content,
            "<customElement>content</customElement>"
        );
        assert_eq!(fragment.qualified_name(), "customElement");
    }

    #[test]
    fn test_xml_fragment_with_namespace() {
        let fragment = XmlFragment::with_namespace(
            "customElement".to_string(),
            Some("http://example.com/custom".to_string()),
            Some("custom".to_string()),
            "<custom:customElement>content</custom:customElement>".to_string(),
        );

        assert_eq!(fragment.qualified_name(), "custom:customElement");
        assert!(fragment.is_from_namespace("http://example.com/custom"));
    }

    #[test]
    fn test_extensions_container() {
        let mut extensions = Extensions::new();
        assert!(extensions.is_empty());

        let fragment = XmlFragment::new("test".to_string(), "<test/>".to_string());

        extensions.add_fragment("message/test".to_string(), fragment);
        assert!(!extensions.is_empty());
        assert_eq!(extensions.count(), 1);
    }

    #[test]
    fn test_canonical_xml_generation() {
        let mut fragment = XmlFragment::new(
            "customElement".to_string(),
            "<customElement attr=\"value\">text</customElement>".to_string(),
        );

        fragment.add_attribute("attr".to_string(), "value".to_string());
        fragment.text_content = Some("text".to_string());

        let xml = fragment.to_canonical_xml(0);
        assert!(xml.contains("<customElement attr=\"value\">text</customElement>"));
    }

    #[test]
    fn test_location_key_generation() {
        let key = utils::generate_location_key(
            &["message", "header"],
            Some("http://example.com/ns"),
            "customElement",
        );

        assert_eq!(key, "message/header/http://example.com/ns/customElement");
    }

    #[test]
    fn test_ddex_namespace_detection() {
        assert!(utils::is_ddex_namespace("http://ddex.net/xml/ern/43"));
        assert!(utils::is_ddex_namespace("http://ddex.net/xml/avs"));
        assert!(!utils::is_ddex_namespace("http://example.com/custom"));
    }
}

#[cfg(test)]
pub mod test_data;

#[cfg(test)]
mod comprehensive_tests;