1use crate::ast::{Element, Node, AST};
7use crate::determinism::{DeterminismConfig, IndentChar};
8use crate::error::BuildError;
9use crate::optimized_strings::{buffer_sizes, BuildContext, OptimizedString};
10use indexmap::IndexMap;
11
12pub struct OptimizedXmlWriter<'a> {
14 config: DeterminismConfig,
15 context: &'a mut BuildContext,
16}
17
18impl<'a> OptimizedXmlWriter<'a> {
19 pub fn new(config: DeterminismConfig, context: &'a mut BuildContext) -> Self {
21 Self { config, context }
22 }
23
24 pub fn write(&mut self, ast: &AST) -> Result<String, BuildError> {
26 let estimated_size = self.estimate_output_size(ast);
28
29 let mut buffer = self.context.get_xml_buffer(estimated_size);
31
32 buffer.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
34
35 self.write_element_optimized(
37 &mut buffer,
38 &ast.root,
39 &ast.namespaces,
40 ast.schema_location.as_deref(),
41 0,
42 )?;
43
44 let result = buffer.clone();
46 self.context.return_xml_buffer(buffer);
47
48 Ok(result)
49 }
50
51 fn estimate_output_size(&self, ast: &AST) -> usize {
53 let _element_count = self.count_elements(&ast.root);
54
55 let track_count = self.estimate_track_count(&ast.root);
57 buffer_sizes::estimated_xml_size(track_count)
58 }
59
60 fn count_elements(&self, element: &Element) -> usize {
62 1 + element
63 .children
64 .iter()
65 .map(|child| match child {
66 Node::Element(elem) => self.count_elements(elem),
67 _ => 0,
68 })
69 .sum::<usize>()
70 }
71
72 fn estimate_track_count(&self, element: &Element) -> usize {
74 self.count_sound_recordings(element)
76 }
77
78 fn count_sound_recordings(&self, element: &Element) -> usize {
80 let mut count = 0;
81
82 if element.name == "SoundRecording" {
83 count += 1;
84 }
85
86 for child in &element.children {
87 if let Node::Element(child_elem) = child {
88 count += self.count_sound_recordings(child_elem);
89 }
90 }
91
92 count.max(1) }
94
95 fn write_element_optimized(
97 &mut self,
98 writer: &mut String,
99 element: &Element,
100 namespaces: &IndexMap<String, String>,
101 schema_location: Option<&str>,
102 depth: usize,
103 ) -> Result<(), BuildError> {
104 let indent = self.get_optimized_indent(depth);
106
107 writer.reserve(128); writer.push_str(&indent);
110 writer.push('<');
111
112 let element_name = self.optimize_element_name(element, namespaces, depth);
114 writer.push_str(element_name.as_str());
115
116 if depth == 0 {
118 for (prefix, uri) in namespaces {
119 writer.push_str(" xmlns:");
120 writer.push_str(prefix);
121 writer.push_str("=\"");
122 writer.push_str(uri);
123 writer.push('"');
124 }
125
126 if let Some(location) = schema_location {
127 writer.push_str(" xsi:schemaLocation=\"");
128 writer.push_str(location);
129 writer.push('"');
130 }
131 }
132
133 for (key, value) in &element.attributes {
135 writer.push(' ');
136 writer.push_str(key);
137 writer.push_str("=\"");
138 self.escape_attribute_into(value, writer);
140 writer.push('"');
141 }
142
143 if element.children.is_empty() {
145 writer.push_str("/>\n");
146 } else {
147 let only_text =
149 element.children.len() == 1 && matches!(&element.children[0], Node::Text(_));
150
151 if only_text {
152 writer.push('>');
154 if let Node::Text(text) = &element.children[0] {
155 self.escape_text_into(text, writer);
156 }
157 writer.push_str("</");
158 writer.push_str(element_name.as_str());
159 writer.push_str(">\n");
160 } else {
161 writer.push_str(">\n");
163
164 for child in &element.children {
166 match child {
167 Node::Element(child_elem) => {
168 self.write_element_optimized(
169 writer,
170 child_elem,
171 namespaces,
172 None,
173 depth + 1,
174 )?;
175 }
176 Node::Text(text) => {
177 writer.push_str(&self.get_optimized_indent(depth + 1));
178 self.escape_text_into(text, writer);
179 writer.push('\n');
180 }
181 Node::Comment(comment) => {
182 writer.push_str(&self.get_optimized_indent(depth + 1));
183 let comment_xml = comment.to_xml();
184 writer.push_str(&comment_xml);
185 writer.push_str("\n");
186 }
187 Node::SimpleComment(comment) => {
188 writer.push_str(&self.get_optimized_indent(depth + 1));
189 writer.push_str("<!-- ");
190 writer.push_str(comment);
191 writer.push_str(" -->\n");
192 }
193 }
194 }
195
196 writer.push_str(&indent);
198 writer.push_str("</");
199 writer.push_str(element_name.as_str());
200 writer.push_str(">\n");
201 }
202 }
203
204 Ok(())
205 }
206
207 fn optimize_element_name(
209 &mut self,
210 element: &Element,
211 namespaces: &IndexMap<String, String>,
212 depth: usize,
213 ) -> OptimizedString {
214 let name_with_ns = if let Some(ns) = &element.namespace {
216 format!("{}:{}", ns, element.name)
217 } else if depth == 0 && !namespaces.is_empty() {
218 if let Some((prefix, _)) = namespaces.first() {
219 format!("{}:{}", prefix, element.name)
220 } else {
221 element.name.clone()
222 }
223 } else {
224 element.name.clone()
225 };
226
227 self.context.optimize_string(&name_with_ns)
228 }
229
230 fn get_optimized_indent(&self, depth: usize) -> String {
232 static CACHED_SPACE_INDENTS: once_cell::sync::Lazy<Vec<String>> =
234 once_cell::sync::Lazy::new(|| (0..=10).map(|d| " ".repeat(d * 2)).collect());
235
236 static CACHED_TAB_INDENTS: once_cell::sync::Lazy<Vec<String>> =
237 once_cell::sync::Lazy::new(|| (0..=10).map(|d| "\t".repeat(d)).collect());
238
239 let indent_width = self.config.indent_width;
240
241 match self.config.indent_char {
242 IndentChar::Space => {
243 if depth <= 10 && indent_width == 2 {
244 CACHED_SPACE_INDENTS[depth].clone()
245 } else {
246 " ".repeat(depth * indent_width)
247 }
248 }
249 IndentChar::Tab => {
250 if depth <= 10 && indent_width == 1 {
251 CACHED_TAB_INDENTS[depth].clone()
252 } else {
253 "\t".repeat(depth * indent_width)
254 }
255 }
256 }
257 }
258
259 fn escape_text_into(&self, text: &str, writer: &mut String) {
261 writer.reserve(text.len() * 6); for ch in text.chars() {
265 match ch {
266 '&' => writer.push_str("&"),
267 '<' => writer.push_str("<"),
268 '>' => writer.push_str(">"),
269 _ => writer.push(ch),
270 }
271 }
272 }
273
274 fn escape_attribute_into(&self, text: &str, writer: &mut String) {
276 writer.reserve(text.len() * 6);
277
278 for ch in text.chars() {
279 match ch {
280 '&' => writer.push_str("&"),
281 '<' => writer.push_str("<"),
282 '>' => writer.push_str(">"),
283 '"' => writer.push_str("""),
284 '\'' => writer.push_str("'"),
285 _ => writer.push(ch),
286 }
287 }
288 }
289}
290
291pub mod vectorized {
293 use super::*;
294 use rayon::prelude::*;
295
296 pub fn write_elements_parallel<T>(
298 elements: &[T],
299 context: &mut BuildContext,
300 config: &DeterminismConfig,
301 converter: impl Fn(&T) -> Element + Send + Sync,
302 ) -> Result<Vec<String>, BuildError>
303 where
304 T: Send + Sync,
305 {
306 if elements.len() < 10 {
308 return write_elements_sequential(elements, context, config, converter);
309 }
310
311 let chunk_size = (elements.len() / num_cpus::get()).max(1);
313
314 elements
315 .par_chunks(chunk_size)
316 .map(|chunk| {
317 let mut local_context = BuildContext::new();
319 let mut writer = OptimizedXmlWriter::new(config.clone(), &mut local_context);
320
321 let mut results = Vec::with_capacity(chunk.len());
322 for element in chunk {
323 let converted = converter(element);
324 let ast = AST {
325 root: converted,
326 namespaces: IndexMap::new(),
327 schema_location: None,
328 };
329 results.push(writer.write(&ast)?);
330 }
331 Ok(results)
332 })
333 .collect::<Result<Vec<_>, BuildError>>()
334 .map(|chunks| chunks.into_iter().flatten().collect())
335 }
336
337 fn write_elements_sequential<T>(
339 elements: &[T],
340 context: &mut BuildContext,
341 config: &DeterminismConfig,
342 converter: impl Fn(&T) -> Element,
343 ) -> Result<Vec<String>, BuildError> {
344 let mut writer = OptimizedXmlWriter::new(config.clone(), context);
345 let mut results = Vec::with_capacity(elements.len());
346
347 for element in elements {
348 let converted = converter(element);
349 let ast = AST {
350 root: converted,
351 namespaces: IndexMap::new(),
352 schema_location: None,
353 };
354 results.push(writer.write(&ast)?);
355 }
356
357 Ok(results)
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use crate::optimized_strings::BuildContext;
365
366 #[test]
367 fn test_optimized_writer_performance() {
368 let mut context = BuildContext::new();
369 let config = DeterminismConfig::default();
370 let mut writer = OptimizedXmlWriter::new(config, &mut context);
371
372 let element = Element {
374 name: "TestElement".to_string(),
375 namespace: None,
376 attributes: IndexMap::new(),
377 children: vec![Node::Text("Test content".to_string())],
378 };
379
380 let ast = AST {
381 root: element,
382 namespaces: IndexMap::new(),
383 schema_location: None,
384 };
385
386 let result = writer.write(&ast).unwrap();
387 assert!(result.contains("<TestElement>Test content</TestElement>"));
388
389 assert_eq!(context.stats.buffers_requested, 1);
391 }
392
393 #[test]
394 fn test_size_estimation() {
395 let mut context = BuildContext::new();
396 let config = DeterminismConfig::default();
397 let writer = OptimizedXmlWriter::new(config, &mut context);
398
399 let sr_element = Element {
401 name: "SoundRecording".to_string(),
402 namespace: None,
403 attributes: IndexMap::new(),
404 children: vec![],
405 };
406
407 let root = Element {
408 name: "NewReleaseMessage".to_string(),
409 namespace: None,
410 attributes: IndexMap::new(),
411 children: vec![Node::Element(sr_element)],
412 };
413
414 let ast = AST {
415 root,
416 namespaces: IndexMap::new(),
417 schema_location: None,
418 };
419
420 let estimated = writer.estimate_output_size(&ast);
421 assert!(estimated > buffer_sizes::SINGLE_TRACK_XML / 2);
422 }
423}