ddex_builder/generator/
optimized_xml_writer.rs

1//! High-performance XML writer optimized for DDEX Builder
2//!
3//! This writer uses string interning, buffer pooling, and vectorized operations
4//! to achieve target performance of <10ms for typical albums.
5
6use crate::ast::{Element, Node, AST};
7use crate::determinism::{DeterminismConfig, IndentChar};
8use crate::error::BuildError;
9use crate::optimized_strings::{buffer_sizes, BuildContext, OptimizedString};
10use indexmap::IndexMap;
11
12/// High-performance XML writer with optimizations
13pub struct OptimizedXmlWriter<'a> {
14    config: DeterminismConfig,
15    context: &'a mut BuildContext,
16}
17
18impl<'a> OptimizedXmlWriter<'a> {
19    /// Create a new optimized XML writer
20    pub fn new(config: DeterminismConfig, context: &'a mut BuildContext) -> Self {
21        Self { config, context }
22    }
23
24    /// Write AST to XML string with performance optimizations
25    pub fn write(&mut self, ast: &AST) -> Result<String, BuildError> {
26        // Pre-calculate estimated size based on AST complexity
27        let estimated_size = self.estimate_output_size(ast);
28
29        // Get pre-sized buffer from pool
30        let mut buffer = self.context.get_xml_buffer(estimated_size);
31
32        // Write XML declaration (static strings for performance)
33        buffer.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
34
35        // Write root element with namespaces
36        self.write_element_optimized(
37            &mut buffer,
38            &ast.root,
39            &ast.namespaces,
40            ast.schema_location.as_deref(),
41            0,
42        )?;
43
44        // Return buffer to pool for reuse
45        let result = buffer.clone();
46        self.context.return_xml_buffer(buffer);
47
48        Ok(result)
49    }
50
51    /// Estimate output size to pre-allocate buffers efficiently
52    fn estimate_output_size(&self, ast: &AST) -> usize {
53        let _element_count = self.count_elements(&ast.root);
54
55        // Use our buffer size estimation
56        let track_count = self.estimate_track_count(&ast.root);
57        buffer_sizes::estimated_xml_size(track_count)
58    }
59
60    /// Count total elements in AST for size estimation
61    fn count_elements(&self, element: &Element) -> usize {
62        1 + element
63            .children
64            .iter()
65            .map(|child| match child {
66                Node::Element(elem) => self.count_elements(elem),
67                _ => 0,
68            })
69            .sum::<usize>()
70    }
71
72    /// Estimate track count for buffer sizing
73    fn estimate_track_count(&self, element: &Element) -> usize {
74        // Look for SoundRecording elements as proxy for track count
75        self.count_sound_recordings(element)
76    }
77
78    /// Count SoundRecording elements
79    fn count_sound_recordings(&self, element: &Element) -> usize {
80        let mut count = 0;
81
82        if element.name == "SoundRecording" {
83            count += 1;
84        }
85
86        for child in &element.children {
87            if let Node::Element(child_elem) = child {
88                count += self.count_sound_recordings(child_elem);
89            }
90        }
91
92        count.max(1) // At least 1 for sizing
93    }
94
95    /// Optimized element writing with string interning and fast paths
96    fn write_element_optimized(
97        &mut self,
98        writer: &mut String,
99        element: &Element,
100        namespaces: &IndexMap<String, String>,
101        schema_location: Option<&str>,
102        depth: usize,
103    ) -> Result<(), BuildError> {
104        // Pre-calculate indent (cache common depths)
105        let indent = self.get_optimized_indent(depth);
106
107        // Start tag with capacity hint
108        writer.reserve(128); // Common element size
109        writer.push_str(&indent);
110        writer.push('<');
111
112        // Optimize element name with interning
113        let element_name = self.optimize_element_name(element, namespaces, depth);
114        writer.push_str(element_name.as_str());
115
116        // Add namespace declarations on root element
117        if depth == 0 {
118            for (prefix, uri) in namespaces {
119                writer.push_str(" xmlns:");
120                writer.push_str(prefix);
121                writer.push_str("=\"");
122                writer.push_str(uri);
123                writer.push('"');
124            }
125
126            if let Some(location) = schema_location {
127                writer.push_str(" xsi:schemaLocation=\"");
128                writer.push_str(location);
129                writer.push('"');
130            }
131        }
132
133        // Add attributes (in deterministic order)
134        for (key, value) in &element.attributes {
135            writer.push(' ');
136            writer.push_str(key);
137            writer.push_str("=\"");
138            // Use optimized escaping
139            self.escape_attribute_into(value, writer);
140            writer.push('"');
141        }
142
143        // Handle children with fast paths
144        if element.children.is_empty() {
145            writer.push_str("/>\n");
146        } else {
147            // Check for common patterns
148            let only_text =
149                element.children.len() == 1 && matches!(&element.children[0], Node::Text(_));
150
151            if only_text {
152                // Inline text content (most common case)
153                writer.push('>');
154                if let Node::Text(text) = &element.children[0] {
155                    self.escape_text_into(text, writer);
156                }
157                writer.push_str("</");
158                writer.push_str(element_name.as_str());
159                writer.push_str(">\n");
160            } else {
161                // Has child elements
162                writer.push_str(">\n");
163
164                // Write children with batch operations when possible
165                for child in &element.children {
166                    match child {
167                        Node::Element(child_elem) => {
168                            self.write_element_optimized(
169                                writer,
170                                child_elem,
171                                namespaces,
172                                None,
173                                depth + 1,
174                            )?;
175                        }
176                        Node::Text(text) => {
177                            writer.push_str(&self.get_optimized_indent(depth + 1));
178                            self.escape_text_into(text, writer);
179                            writer.push('\n');
180                        }
181                        Node::Comment(comment) => {
182                            writer.push_str(&self.get_optimized_indent(depth + 1));
183                            let comment_xml = comment.to_xml();
184                            writer.push_str(&comment_xml);
185                            writer.push_str("\n");
186                        }
187                        Node::SimpleComment(comment) => {
188                            writer.push_str(&self.get_optimized_indent(depth + 1));
189                            writer.push_str("<!-- ");
190                            writer.push_str(comment);
191                            writer.push_str(" -->\n");
192                        }
193                    }
194                }
195
196                // Close tag
197                writer.push_str(&indent);
198                writer.push_str("</");
199                writer.push_str(element_name.as_str());
200                writer.push_str(">\n");
201            }
202        }
203
204        Ok(())
205    }
206
207    /// Optimize element name with caching and interning
208    fn optimize_element_name(
209        &mut self,
210        element: &Element,
211        namespaces: &IndexMap<String, String>,
212        depth: usize,
213    ) -> OptimizedString {
214        // Common element names are cached as static strings
215        let name_with_ns = if let Some(ns) = &element.namespace {
216            format!("{}:{}", ns, element.name)
217        } else if depth == 0 && !namespaces.is_empty() {
218            if let Some((prefix, _)) = namespaces.first() {
219                format!("{}:{}", prefix, element.name)
220            } else {
221                element.name.clone()
222            }
223        } else {
224            element.name.clone()
225        };
226
227        self.context.optimize_string(&name_with_ns)
228    }
229
230    /// Cache common indent patterns
231    fn get_optimized_indent(&self, depth: usize) -> String {
232        // Cache up to 10 levels (covers 99% of DDEX structures)
233        static CACHED_SPACE_INDENTS: once_cell::sync::Lazy<Vec<String>> =
234            once_cell::sync::Lazy::new(|| (0..=10).map(|d| " ".repeat(d * 2)).collect());
235
236        static CACHED_TAB_INDENTS: once_cell::sync::Lazy<Vec<String>> =
237            once_cell::sync::Lazy::new(|| (0..=10).map(|d| "\t".repeat(d)).collect());
238
239        let indent_width = self.config.indent_width;
240
241        match self.config.indent_char {
242            IndentChar::Space => {
243                if depth <= 10 && indent_width == 2 {
244                    CACHED_SPACE_INDENTS[depth].clone()
245                } else {
246                    " ".repeat(depth * indent_width)
247                }
248            }
249            IndentChar::Tab => {
250                if depth <= 10 && indent_width == 1 {
251                    CACHED_TAB_INDENTS[depth].clone()
252                } else {
253                    "\t".repeat(depth * indent_width)
254                }
255            }
256        }
257    }
258
259    /// In-place text escaping to avoid allocations
260    fn escape_text_into(&self, text: &str, writer: &mut String) {
261        // Reserve space for worst-case escaping
262        writer.reserve(text.len() * 6); // Worst case: all chars become &entity;
263
264        for ch in text.chars() {
265            match ch {
266                '&' => writer.push_str("&amp;"),
267                '<' => writer.push_str("&lt;"),
268                '>' => writer.push_str("&gt;"),
269                _ => writer.push(ch),
270            }
271        }
272    }
273
274    /// In-place attribute escaping
275    fn escape_attribute_into(&self, text: &str, writer: &mut String) {
276        writer.reserve(text.len() * 6);
277
278        for ch in text.chars() {
279            match ch {
280                '&' => writer.push_str("&amp;"),
281                '<' => writer.push_str("&lt;"),
282                '>' => writer.push_str("&gt;"),
283                '"' => writer.push_str("&quot;"),
284                '\'' => writer.push_str("&apos;"),
285                _ => writer.push(ch),
286            }
287        }
288    }
289}
290
291/// Vectorized XML operations for batch processing
292pub mod vectorized {
293    use super::*;
294    use rayon::prelude::*;
295
296    /// Write multiple elements in parallel (for large collections)
297    pub fn write_elements_parallel<T>(
298        elements: &[T],
299        context: &mut BuildContext,
300        config: &DeterminismConfig,
301        converter: impl Fn(&T) -> Element + Send + Sync,
302    ) -> Result<Vec<String>, BuildError>
303    where
304        T: Send + Sync,
305    {
306        // Only use parallelization for large collections
307        if elements.len() < 10 {
308            return write_elements_sequential(elements, context, config, converter);
309        }
310
311        // Process in parallel chunks
312        let chunk_size = (elements.len() / num_cpus::get()).max(1);
313
314        elements
315            .par_chunks(chunk_size)
316            .map(|chunk| {
317                // Each thread needs its own context to avoid conflicts
318                let mut local_context = BuildContext::new();
319                let mut writer = OptimizedXmlWriter::new(config.clone(), &mut local_context);
320
321                let mut results = Vec::with_capacity(chunk.len());
322                for element in chunk {
323                    let converted = converter(element);
324                    let ast = AST {
325                        root: converted,
326                        namespaces: IndexMap::new(),
327                        schema_location: None,
328                    };
329                    results.push(writer.write(&ast)?);
330                }
331                Ok(results)
332            })
333            .collect::<Result<Vec<_>, BuildError>>()
334            .map(|chunks| chunks.into_iter().flatten().collect())
335    }
336
337    /// Sequential version for smaller collections
338    fn write_elements_sequential<T>(
339        elements: &[T],
340        context: &mut BuildContext,
341        config: &DeterminismConfig,
342        converter: impl Fn(&T) -> Element,
343    ) -> Result<Vec<String>, BuildError> {
344        let mut writer = OptimizedXmlWriter::new(config.clone(), context);
345        let mut results = Vec::with_capacity(elements.len());
346
347        for element in elements {
348            let converted = converter(element);
349            let ast = AST {
350                root: converted,
351                namespaces: IndexMap::new(),
352                schema_location: None,
353            };
354            results.push(writer.write(&ast)?);
355        }
356
357        Ok(results)
358    }
359}
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364    use crate::optimized_strings::BuildContext;
365
366    #[test]
367    fn test_optimized_writer_performance() {
368        let mut context = BuildContext::new();
369        let config = DeterminismConfig::default();
370        let mut writer = OptimizedXmlWriter::new(config, &mut context);
371
372        // Create a simple AST
373        let element = Element {
374            name: "TestElement".to_string(),
375            namespace: None,
376            attributes: IndexMap::new(),
377            children: vec![Node::Text("Test content".to_string())],
378        };
379
380        let ast = AST {
381            root: element,
382            namespaces: IndexMap::new(),
383            schema_location: None,
384        };
385
386        let result = writer.write(&ast).unwrap();
387        assert!(result.contains("<TestElement>Test content</TestElement>"));
388
389        // Check that context accumulated statistics
390        assert_eq!(context.stats.buffers_requested, 1);
391    }
392
393    #[test]
394    fn test_size_estimation() {
395        let mut context = BuildContext::new();
396        let config = DeterminismConfig::default();
397        let writer = OptimizedXmlWriter::new(config, &mut context);
398
399        // Create AST with sound recordings
400        let sr_element = Element {
401            name: "SoundRecording".to_string(),
402            namespace: None,
403            attributes: IndexMap::new(),
404            children: vec![],
405        };
406
407        let root = Element {
408            name: "NewReleaseMessage".to_string(),
409            namespace: None,
410            attributes: IndexMap::new(),
411            children: vec![Node::Element(sr_element)],
412        };
413
414        let ast = AST {
415            root,
416            namespaces: IndexMap::new(),
417            schema_location: None,
418        };
419
420        let estimated = writer.estimate_output_size(&ast);
421        assert!(estimated > buffer_sizes::SINGLE_TRACK_XML / 2);
422    }
423}