ddex_builder/generator/
optimized_xml_writer.rs

1//! High-performance XML writer optimized for DDEX Builder
2//! 
3//! This writer uses string interning, buffer pooling, and vectorized operations
4//! to achieve target performance of <10ms for typical albums.
5
6use crate::ast::{AST, Element, Node};
7use crate::determinism::{DeterminismConfig, IndentChar};
8use crate::error::BuildError;
9use crate::optimized_strings::{BuildContext, OptimizedString, buffer_sizes};
10use indexmap::IndexMap;
11use std::io::Write;
12use std::fmt::Write as FmtWrite;
13
14/// High-performance XML writer with optimizations
15pub struct OptimizedXmlWriter<'a> {
16    config: DeterminismConfig,
17    context: &'a mut BuildContext,
18}
19
20impl<'a> OptimizedXmlWriter<'a> {
21    /// Create a new optimized XML writer
22    pub fn new(config: DeterminismConfig, context: &'a mut BuildContext) -> Self {
23        Self { config, context }
24    }
25    
26    /// Write AST to XML string with performance optimizations
27    pub fn write(&mut self, ast: &AST) -> Result<String, BuildError> {
28        // Pre-calculate estimated size based on AST complexity
29        let estimated_size = self.estimate_output_size(ast);
30        
31        // Get pre-sized buffer from pool
32        let mut buffer = self.context.get_xml_buffer(estimated_size);
33        
34        // Write XML declaration (static strings for performance)
35        buffer.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
36        
37        // Write root element with namespaces
38        self.write_element_optimized(&mut buffer, &ast.root, &ast.namespaces, 
39                                   ast.schema_location.as_deref(), 0)?;
40        
41        // Return buffer to pool for reuse
42        let result = buffer.clone();
43        self.context.return_xml_buffer(buffer);
44        
45        Ok(result)
46    }
47    
48    /// Estimate output size to pre-allocate buffers efficiently
49    fn estimate_output_size(&self, ast: &AST) -> usize {
50        let element_count = self.count_elements(&ast.root);
51        
52        // Use our buffer size estimation
53        let track_count = self.estimate_track_count(&ast.root);
54        buffer_sizes::estimated_xml_size(track_count)
55    }
56    
57    /// Count total elements in AST for size estimation
58    fn count_elements(&self, element: &Element) -> usize {
59        1 + element.children.iter()
60            .map(|child| match child {
61                Node::Element(elem) => self.count_elements(elem),
62                _ => 0,
63            })
64            .sum::<usize>()
65    }
66    
67    /// Estimate track count for buffer sizing
68    fn estimate_track_count(&self, element: &Element) -> usize {
69        // Look for SoundRecording elements as proxy for track count
70        self.count_sound_recordings(element)
71    }
72    
73    /// Count SoundRecording elements
74    fn count_sound_recordings(&self, element: &Element) -> usize {
75        let mut count = 0;
76        
77        if element.name == "SoundRecording" {
78            count += 1;
79        }
80        
81        for child in &element.children {
82            if let Node::Element(child_elem) = child {
83                count += self.count_sound_recordings(child_elem);
84            }
85        }
86        
87        count.max(1) // At least 1 for sizing
88    }
89    
90    /// Optimized element writing with string interning and fast paths
91    fn write_element_optimized(
92        &mut self,
93        writer: &mut String,
94        element: &Element,
95        namespaces: &IndexMap<String, String>,
96        schema_location: Option<&str>,
97        depth: usize,
98    ) -> Result<(), BuildError> {
99        // Pre-calculate indent (cache common depths)
100        let indent = self.get_optimized_indent(depth);
101        
102        // Start tag with capacity hint
103        writer.reserve(128); // Common element size
104        writer.push_str(&indent);
105        writer.push('<');
106        
107        // Optimize element name with interning
108        let element_name = self.optimize_element_name(element, namespaces, depth);
109        writer.push_str(element_name.as_str());
110        
111        // Add namespace declarations on root element
112        if depth == 0 {
113            for (prefix, uri) in namespaces {
114                writer.push_str(" xmlns:");
115                writer.push_str(prefix);
116                writer.push_str("=\"");
117                writer.push_str(uri);
118                writer.push('"');
119            }
120            
121            if let Some(location) = schema_location {
122                writer.push_str(" xsi:schemaLocation=\"");
123                writer.push_str(location);
124                writer.push('"');
125            }
126        }
127        
128        // Add attributes (in deterministic order)
129        for (key, value) in &element.attributes {
130            writer.push(' ');
131            writer.push_str(key);
132            writer.push_str("=\"");
133            // Use optimized escaping
134            self.escape_attribute_into(value, writer);
135            writer.push('"');
136        }
137        
138        // Handle children with fast paths
139        if element.children.is_empty() {
140            writer.push_str("/>\n");
141        } else {
142            // Check for common patterns
143            let only_text = element.children.len() == 1 && 
144                matches!(&element.children[0], Node::Text(_));
145            
146            if only_text {
147                // Inline text content (most common case)
148                writer.push('>');
149                if let Node::Text(text) = &element.children[0] {
150                    self.escape_text_into(text, writer);
151                }
152                writer.push_str("</");
153                writer.push_str(element_name.as_str());
154                writer.push_str(">\n");
155            } else {
156                // Has child elements
157                writer.push_str(">\n");
158                
159                // Write children with batch operations when possible
160                for child in &element.children {
161                    match child {
162                        Node::Element(child_elem) => {
163                            self.write_element_optimized(writer, child_elem, namespaces, None, depth + 1)?;
164                        }
165                        Node::Text(text) => {
166                            writer.push_str(&self.get_optimized_indent(depth + 1));
167                            self.escape_text_into(text, writer);
168                            writer.push('\n');
169                        }
170                        Node::Comment(comment) => {
171                            writer.push_str(&self.get_optimized_indent(depth + 1));
172                            let comment_xml = comment.to_xml();
173                            writer.push_str(&comment_xml);
174                            writer.push_str("\n");
175                        }
176                        Node::SimpleComment(comment) => {
177                            writer.push_str(&self.get_optimized_indent(depth + 1));
178                            writer.push_str("<!-- ");
179                            writer.push_str(comment);
180                            writer.push_str(" -->\n");
181                        }
182                    }
183                }
184                
185                // Close tag
186                writer.push_str(&indent);
187                writer.push_str("</");
188                writer.push_str(element_name.as_str());
189                writer.push_str(">\n");
190            }
191        }
192        
193        Ok(())
194    }
195    
196    /// Optimize element name with caching and interning
197    fn optimize_element_name(
198        &mut self,
199        element: &Element,
200        namespaces: &IndexMap<String, String>,
201        depth: usize,
202    ) -> OptimizedString {
203        // Common element names are cached as static strings
204        let name_with_ns = if let Some(ns) = &element.namespace {
205            format!("{}:{}", ns, element.name)
206        } else if depth == 0 && !namespaces.is_empty() {
207            if let Some((prefix, _)) = namespaces.first() {
208                format!("{}:{}", prefix, element.name)
209            } else {
210                element.name.clone()
211            }
212        } else {
213            element.name.clone()
214        };
215        
216        self.context.optimize_string(&name_with_ns)
217    }
218    
219    /// Cache common indent patterns
220    fn get_optimized_indent(&self, depth: usize) -> String {
221        // Cache up to 10 levels (covers 99% of DDEX structures)
222        static CACHED_SPACE_INDENTS: once_cell::sync::Lazy<Vec<String>> = 
223            once_cell::sync::Lazy::new(|| {
224                (0..=10).map(|d| " ".repeat(d * 2)).collect()
225            });
226        
227        static CACHED_TAB_INDENTS: once_cell::sync::Lazy<Vec<String>> = 
228            once_cell::sync::Lazy::new(|| {
229                (0..=10).map(|d| "\t".repeat(d)).collect()
230            });
231        
232        let indent_width = self.config.indent_width;
233        
234        match self.config.indent_char {
235            IndentChar::Space => {
236                if depth <= 10 && indent_width == 2 {
237                    CACHED_SPACE_INDENTS[depth].clone()
238                } else {
239                    " ".repeat(depth * indent_width)
240                }
241            }
242            IndentChar::Tab => {
243                if depth <= 10 && indent_width == 1 {
244                    CACHED_TAB_INDENTS[depth].clone()
245                } else {
246                    "\t".repeat(depth * indent_width)
247                }
248            }
249        }
250    }
251    
252    /// In-place text escaping to avoid allocations
253    fn escape_text_into(&self, text: &str, writer: &mut String) {
254        // Reserve space for worst-case escaping
255        writer.reserve(text.len() * 6); // Worst case: all chars become &entity;
256        
257        for ch in text.chars() {
258            match ch {
259                '&' => writer.push_str("&amp;"),
260                '<' => writer.push_str("&lt;"),
261                '>' => writer.push_str("&gt;"),
262                _ => writer.push(ch),
263            }
264        }
265    }
266    
267    /// In-place attribute escaping
268    fn escape_attribute_into(&self, text: &str, writer: &mut String) {
269        writer.reserve(text.len() * 6);
270        
271        for ch in text.chars() {
272            match ch {
273                '&' => writer.push_str("&amp;"),
274                '<' => writer.push_str("&lt;"),
275                '>' => writer.push_str("&gt;"),
276                '"' => writer.push_str("&quot;"),
277                '\'' => writer.push_str("&apos;"),
278                _ => writer.push(ch),
279            }
280        }
281    }
282}
283
284/// Vectorized XML operations for batch processing
285pub mod vectorized {
286    use super::*;
287    use rayon::prelude::*;
288    
289    /// Write multiple elements in parallel (for large collections)
290    pub fn write_elements_parallel<T>(
291        elements: &[T],
292        context: &mut BuildContext,
293        config: &DeterminismConfig,
294        converter: impl Fn(&T) -> Element + Send + Sync,
295    ) -> Result<Vec<String>, BuildError> 
296    where
297        T: Send + Sync,
298    {
299        // Only use parallelization for large collections
300        if elements.len() < 10 {
301            return write_elements_sequential(elements, context, config, converter);
302        }
303        
304        // Process in parallel chunks
305        let chunk_size = (elements.len() / num_cpus::get()).max(1);
306        
307        elements
308            .par_chunks(chunk_size)
309            .map(|chunk| {
310                // Each thread needs its own context to avoid conflicts
311                let mut local_context = BuildContext::new();
312                let mut writer = OptimizedXmlWriter::new(config.clone(), &mut local_context);
313                
314                let mut results = Vec::with_capacity(chunk.len());
315                for element in chunk {
316                    let converted = converter(element);
317                    let ast = AST {
318                        root: converted,
319                        namespaces: IndexMap::new(),
320                        schema_location: None,
321                    };
322                    results.push(writer.write(&ast)?);
323                }
324                Ok(results)
325            })
326            .collect::<Result<Vec<_>, BuildError>>()
327            .map(|chunks| chunks.into_iter().flatten().collect())
328    }
329    
330    /// Sequential version for smaller collections
331    fn write_elements_sequential<T>(
332        elements: &[T],
333        context: &mut BuildContext,
334        config: &DeterminismConfig,
335        converter: impl Fn(&T) -> Element,
336    ) -> Result<Vec<String>, BuildError> {
337        let mut writer = OptimizedXmlWriter::new(config.clone(), context);
338        let mut results = Vec::with_capacity(elements.len());
339        
340        for element in elements {
341            let converted = converter(element);
342            let ast = AST {
343                root: converted,
344                namespaces: IndexMap::new(),
345                schema_location: None,
346            };
347            results.push(writer.write(&ast)?);
348        }
349        
350        Ok(results)
351    }
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357    use crate::optimized_strings::BuildContext;
358    
359    #[test]
360    fn test_optimized_writer_performance() {
361        let mut context = BuildContext::new();
362        let config = DeterminismConfig::default();
363        let mut writer = OptimizedXmlWriter::new(config, &mut context);
364        
365        // Create a simple AST
366        let element = Element {
367            name: "TestElement".to_string(),
368            namespace: None,
369            attributes: IndexMap::new(),
370            children: vec![Node::Text("Test content".to_string())],
371        };
372        
373        let ast = AST {
374            root: element,
375            namespaces: IndexMap::new(),
376            schema_location: None,
377        };
378        
379        let result = writer.write(&ast).unwrap();
380        assert!(result.contains("<TestElement>Test content</TestElement>"));
381        
382        // Check that context accumulated statistics
383        assert_eq!(context.stats.buffers_requested, 1);
384    }
385    
386    #[test]
387    fn test_size_estimation() {
388        let mut context = BuildContext::new();
389        let config = DeterminismConfig::default();
390        let writer = OptimizedXmlWriter::new(config, &mut context);
391        
392        // Create AST with sound recordings
393        let sr_element = Element {
394            name: "SoundRecording".to_string(),
395            namespace: None,
396            attributes: IndexMap::new(),
397            children: vec![],
398        };
399        
400        let root = Element {
401            name: "NewReleaseMessage".to_string(),
402            namespace: None,
403            attributes: IndexMap::new(),
404            children: vec![Node::Element(sr_element)],
405        };
406        
407        let ast = AST {
408            root,
409            namespaces: IndexMap::new(),
410            schema_location: None,
411        };
412        
413        let estimated = writer.estimate_output_size(&ast);
414        assert!(estimated > buffer_sizes::SINGLE_TRACK_XML / 2);
415    }
416}