1use crate::ast::{AST, Element, Node};
7use crate::determinism::{DeterminismConfig, IndentChar};
8use crate::error::BuildError;
9use crate::optimized_strings::{BuildContext, OptimizedString, buffer_sizes};
10use indexmap::IndexMap;
11use std::io::Write;
12use std::fmt::Write as FmtWrite;
13
14pub struct OptimizedXmlWriter<'a> {
16 config: DeterminismConfig,
17 context: &'a mut BuildContext,
18}
19
20impl<'a> OptimizedXmlWriter<'a> {
21 pub fn new(config: DeterminismConfig, context: &'a mut BuildContext) -> Self {
23 Self { config, context }
24 }
25
26 pub fn write(&mut self, ast: &AST) -> Result<String, BuildError> {
28 let estimated_size = self.estimate_output_size(ast);
30
31 let mut buffer = self.context.get_xml_buffer(estimated_size);
33
34 buffer.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
36
37 self.write_element_optimized(&mut buffer, &ast.root, &ast.namespaces,
39 ast.schema_location.as_deref(), 0)?;
40
41 let result = buffer.clone();
43 self.context.return_xml_buffer(buffer);
44
45 Ok(result)
46 }
47
48 fn estimate_output_size(&self, ast: &AST) -> usize {
50 let element_count = self.count_elements(&ast.root);
51
52 let track_count = self.estimate_track_count(&ast.root);
54 buffer_sizes::estimated_xml_size(track_count)
55 }
56
57 fn count_elements(&self, element: &Element) -> usize {
59 1 + element.children.iter()
60 .map(|child| match child {
61 Node::Element(elem) => self.count_elements(elem),
62 _ => 0,
63 })
64 .sum::<usize>()
65 }
66
67 fn estimate_track_count(&self, element: &Element) -> usize {
69 self.count_sound_recordings(element)
71 }
72
73 fn count_sound_recordings(&self, element: &Element) -> usize {
75 let mut count = 0;
76
77 if element.name == "SoundRecording" {
78 count += 1;
79 }
80
81 for child in &element.children {
82 if let Node::Element(child_elem) = child {
83 count += self.count_sound_recordings(child_elem);
84 }
85 }
86
87 count.max(1) }
89
90 fn write_element_optimized(
92 &mut self,
93 writer: &mut String,
94 element: &Element,
95 namespaces: &IndexMap<String, String>,
96 schema_location: Option<&str>,
97 depth: usize,
98 ) -> Result<(), BuildError> {
99 let indent = self.get_optimized_indent(depth);
101
102 writer.reserve(128); writer.push_str(&indent);
105 writer.push('<');
106
107 let element_name = self.optimize_element_name(element, namespaces, depth);
109 writer.push_str(element_name.as_str());
110
111 if depth == 0 {
113 for (prefix, uri) in namespaces {
114 writer.push_str(" xmlns:");
115 writer.push_str(prefix);
116 writer.push_str("=\"");
117 writer.push_str(uri);
118 writer.push('"');
119 }
120
121 if let Some(location) = schema_location {
122 writer.push_str(" xsi:schemaLocation=\"");
123 writer.push_str(location);
124 writer.push('"');
125 }
126 }
127
128 for (key, value) in &element.attributes {
130 writer.push(' ');
131 writer.push_str(key);
132 writer.push_str("=\"");
133 self.escape_attribute_into(value, writer);
135 writer.push('"');
136 }
137
138 if element.children.is_empty() {
140 writer.push_str("/>\n");
141 } else {
142 let only_text = element.children.len() == 1 &&
144 matches!(&element.children[0], Node::Text(_));
145
146 if only_text {
147 writer.push('>');
149 if let Node::Text(text) = &element.children[0] {
150 self.escape_text_into(text, writer);
151 }
152 writer.push_str("</");
153 writer.push_str(element_name.as_str());
154 writer.push_str(">\n");
155 } else {
156 writer.push_str(">\n");
158
159 for child in &element.children {
161 match child {
162 Node::Element(child_elem) => {
163 self.write_element_optimized(writer, child_elem, namespaces, None, depth + 1)?;
164 }
165 Node::Text(text) => {
166 writer.push_str(&self.get_optimized_indent(depth + 1));
167 self.escape_text_into(text, writer);
168 writer.push('\n');
169 }
170 Node::Comment(comment) => {
171 writer.push_str(&self.get_optimized_indent(depth + 1));
172 let comment_xml = comment.to_xml();
173 writer.push_str(&comment_xml);
174 writer.push_str("\n");
175 }
176 Node::SimpleComment(comment) => {
177 writer.push_str(&self.get_optimized_indent(depth + 1));
178 writer.push_str("<!-- ");
179 writer.push_str(comment);
180 writer.push_str(" -->\n");
181 }
182 }
183 }
184
185 writer.push_str(&indent);
187 writer.push_str("</");
188 writer.push_str(element_name.as_str());
189 writer.push_str(">\n");
190 }
191 }
192
193 Ok(())
194 }
195
196 fn optimize_element_name(
198 &mut self,
199 element: &Element,
200 namespaces: &IndexMap<String, String>,
201 depth: usize,
202 ) -> OptimizedString {
203 let name_with_ns = if let Some(ns) = &element.namespace {
205 format!("{}:{}", ns, element.name)
206 } else if depth == 0 && !namespaces.is_empty() {
207 if let Some((prefix, _)) = namespaces.first() {
208 format!("{}:{}", prefix, element.name)
209 } else {
210 element.name.clone()
211 }
212 } else {
213 element.name.clone()
214 };
215
216 self.context.optimize_string(&name_with_ns)
217 }
218
219 fn get_optimized_indent(&self, depth: usize) -> String {
221 static CACHED_SPACE_INDENTS: once_cell::sync::Lazy<Vec<String>> =
223 once_cell::sync::Lazy::new(|| {
224 (0..=10).map(|d| " ".repeat(d * 2)).collect()
225 });
226
227 static CACHED_TAB_INDENTS: once_cell::sync::Lazy<Vec<String>> =
228 once_cell::sync::Lazy::new(|| {
229 (0..=10).map(|d| "\t".repeat(d)).collect()
230 });
231
232 let indent_width = self.config.indent_width;
233
234 match self.config.indent_char {
235 IndentChar::Space => {
236 if depth <= 10 && indent_width == 2 {
237 CACHED_SPACE_INDENTS[depth].clone()
238 } else {
239 " ".repeat(depth * indent_width)
240 }
241 }
242 IndentChar::Tab => {
243 if depth <= 10 && indent_width == 1 {
244 CACHED_TAB_INDENTS[depth].clone()
245 } else {
246 "\t".repeat(depth * indent_width)
247 }
248 }
249 }
250 }
251
252 fn escape_text_into(&self, text: &str, writer: &mut String) {
254 writer.reserve(text.len() * 6); for ch in text.chars() {
258 match ch {
259 '&' => writer.push_str("&"),
260 '<' => writer.push_str("<"),
261 '>' => writer.push_str(">"),
262 _ => writer.push(ch),
263 }
264 }
265 }
266
267 fn escape_attribute_into(&self, text: &str, writer: &mut String) {
269 writer.reserve(text.len() * 6);
270
271 for ch in text.chars() {
272 match ch {
273 '&' => writer.push_str("&"),
274 '<' => writer.push_str("<"),
275 '>' => writer.push_str(">"),
276 '"' => writer.push_str("""),
277 '\'' => writer.push_str("'"),
278 _ => writer.push(ch),
279 }
280 }
281 }
282}
283
284pub mod vectorized {
286 use super::*;
287 use rayon::prelude::*;
288
289 pub fn write_elements_parallel<T>(
291 elements: &[T],
292 context: &mut BuildContext,
293 config: &DeterminismConfig,
294 converter: impl Fn(&T) -> Element + Send + Sync,
295 ) -> Result<Vec<String>, BuildError>
296 where
297 T: Send + Sync,
298 {
299 if elements.len() < 10 {
301 return write_elements_sequential(elements, context, config, converter);
302 }
303
304 let chunk_size = (elements.len() / num_cpus::get()).max(1);
306
307 elements
308 .par_chunks(chunk_size)
309 .map(|chunk| {
310 let mut local_context = BuildContext::new();
312 let mut writer = OptimizedXmlWriter::new(config.clone(), &mut local_context);
313
314 let mut results = Vec::with_capacity(chunk.len());
315 for element in chunk {
316 let converted = converter(element);
317 let ast = AST {
318 root: converted,
319 namespaces: IndexMap::new(),
320 schema_location: None,
321 };
322 results.push(writer.write(&ast)?);
323 }
324 Ok(results)
325 })
326 .collect::<Result<Vec<_>, BuildError>>()
327 .map(|chunks| chunks.into_iter().flatten().collect())
328 }
329
330 fn write_elements_sequential<T>(
332 elements: &[T],
333 context: &mut BuildContext,
334 config: &DeterminismConfig,
335 converter: impl Fn(&T) -> Element,
336 ) -> Result<Vec<String>, BuildError> {
337 let mut writer = OptimizedXmlWriter::new(config.clone(), context);
338 let mut results = Vec::with_capacity(elements.len());
339
340 for element in elements {
341 let converted = converter(element);
342 let ast = AST {
343 root: converted,
344 namespaces: IndexMap::new(),
345 schema_location: None,
346 };
347 results.push(writer.write(&ast)?);
348 }
349
350 Ok(results)
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357 use crate::optimized_strings::BuildContext;
358
359 #[test]
360 fn test_optimized_writer_performance() {
361 let mut context = BuildContext::new();
362 let config = DeterminismConfig::default();
363 let mut writer = OptimizedXmlWriter::new(config, &mut context);
364
365 let element = Element {
367 name: "TestElement".to_string(),
368 namespace: None,
369 attributes: IndexMap::new(),
370 children: vec![Node::Text("Test content".to_string())],
371 };
372
373 let ast = AST {
374 root: element,
375 namespaces: IndexMap::new(),
376 schema_location: None,
377 };
378
379 let result = writer.write(&ast).unwrap();
380 assert!(result.contains("<TestElement>Test content</TestElement>"));
381
382 assert_eq!(context.stats.buffers_requested, 1);
384 }
385
386 #[test]
387 fn test_size_estimation() {
388 let mut context = BuildContext::new();
389 let config = DeterminismConfig::default();
390 let writer = OptimizedXmlWriter::new(config, &mut context);
391
392 let sr_element = Element {
394 name: "SoundRecording".to_string(),
395 namespace: None,
396 attributes: IndexMap::new(),
397 children: vec![],
398 };
399
400 let root = Element {
401 name: "NewReleaseMessage".to_string(),
402 namespace: None,
403 attributes: IndexMap::new(),
404 children: vec![Node::Element(sr_element)],
405 };
406
407 let ast = AST {
408 root,
409 namespaces: IndexMap::new(),
410 schema_location: None,
411 };
412
413 let estimated = writer.estimate_output_size(&ast);
414 assert!(estimated > buffer_sizes::SINGLE_TRACK_XML / 2);
415 }
416}