Skip to main content

rust_yaml/
composer_optimized.rs

1//! Optimized YAML composer that reduces allocations and cloning
2//!
3//! This module provides an optimized composer implementation that
4//! minimizes memory allocations and unnecessary cloning operations.
5
6use crate::{
7    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
8    parser::{EventType, ScalarStyle},
9    zero_copy_value::OptimizedValue,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13use std::rc::Rc;
14
15/// Calculate the maximum nesting depth of an optimized value structure
16fn calculate_optimized_structure_depth(value: &OptimizedValue) -> usize {
17    match value {
18        OptimizedValue::Sequence(seq) => {
19            if seq.is_empty() {
20                1
21            } else {
22                1 + seq
23                    .iter()
24                    .map(calculate_optimized_structure_depth)
25                    .max()
26                    .unwrap_or(0)
27            }
28        }
29        OptimizedValue::Mapping(map) => {
30            if map.is_empty() {
31                1
32            } else {
33                1 + map
34                    .values()
35                    .map(calculate_optimized_structure_depth)
36                    .max()
37                    .unwrap_or(0)
38            }
39        }
40        _ => 1, // Scalars have depth 1
41    }
42}
43
44/// Trait for optimized composers
45pub trait OptimizedComposer {
46    /// Check if there are more documents available
47    fn check_document(&self) -> bool;
48
49    /// Compose the next document with minimal allocations
50    fn compose_document(&mut self) -> Result<Option<OptimizedValue>>;
51
52    /// Get the current position in the stream
53    fn position(&self) -> Position;
54
55    /// Reset the composer state
56    fn reset(&mut self);
57}
58
59/// An optimized composer that reduces allocations
60pub struct ReducedAllocComposer {
61    parser: BasicParser,
62    position: Position,
63    /// Store anchors using Rc for cheap cloning
64    anchors: HashMap<String, Rc<OptimizedValue>>,
65    limits: Limits,
66    resource_tracker: ResourceTracker,
67    alias_expansion_stack: Vec<String>,
68    current_depth: usize,
69    /// Active YAML spec version for the current document.
70    yaml_version: crate::version::YamlVersion,
71}
72
73impl ReducedAllocComposer {
74    /// Create a new optimized composer
75    pub fn new(input: String) -> Self {
76        Self::with_limits(input, Limits::default())
77    }
78
79    /// Create a new optimized composer with custom limits
80    pub fn with_limits(input: String, limits: Limits) -> Self {
81        Self {
82            parser: BasicParser::with_limits(input, limits.clone()),
83            position: Position::new(),
84            anchors: HashMap::new(),
85            limits,
86            resource_tracker: ResourceTracker::new(),
87            alias_expansion_stack: Vec::new(),
88            current_depth: 0,
89            yaml_version: crate::version::YamlVersion::default(),
90        }
91    }
92
93    /// Compose a node from events with reduced allocations
94    fn compose_node(&mut self) -> Result<Option<OptimizedValue>> {
95        if !self.parser.check_event() {
96            return Ok(None);
97        }
98
99        let Some(event) = self.parser.get_event()? else {
100            return Ok(None);
101        };
102
103        self.position = event.position;
104
105        match event.event_type {
106            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
107
108            EventType::DocumentStart { .. } => self.compose_node(),
109
110            EventType::DocumentEnd { .. } => Ok(None),
111
112            EventType::Scalar {
113                value,
114                anchor,
115                style,
116                ..
117            } => {
118                let scalar_value = self.compose_scalar_optimized(value, style)?;
119
120                // Store anchor if present - use Rc for cheap cloning
121                if let Some(anchor_name) = anchor {
122                    self.resource_tracker.add_anchor(&self.limits)?;
123                    self.anchors
124                        .insert(anchor_name, Rc::new(scalar_value.clone()));
125                }
126
127                Ok(Some(scalar_value))
128            }
129
130            EventType::SequenceStart { anchor, .. } => {
131                let sequence = self.compose_sequence()?;
132
133                // Store anchor if present
134                if let Some(anchor_name) = anchor {
135                    if let Some(ref seq) = sequence {
136                        self.resource_tracker.add_anchor(&self.limits)?;
137                        self.anchors.insert(anchor_name, Rc::new(seq.clone()));
138                    }
139                }
140
141                Ok(sequence)
142            }
143
144            EventType::MappingStart { anchor, .. } => {
145                let mapping = self.compose_mapping()?;
146
147                // Store anchor if present
148                if let Some(anchor_name) = anchor {
149                    if let Some(ref map) = mapping {
150                        self.resource_tracker.add_anchor(&self.limits)?;
151                        self.anchors.insert(anchor_name, Rc::new(map.clone()));
152                    }
153                }
154
155                Ok(mapping)
156            }
157
158            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
159
160            EventType::Alias { anchor } => {
161                // Check for cyclic references
162                if self.alias_expansion_stack.contains(&anchor) {
163                    return Err(Error::construction(
164                        event.position,
165                        format!("Cyclic alias reference detected: '{}'", anchor),
166                    ));
167                }
168
169                // Check alias expansion depth limit BEFORE pushing
170                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
171                    return Err(Error::construction(
172                        event.position,
173                        format!(
174                            "Maximum alias expansion depth {} exceeded",
175                            self.limits.max_alias_depth
176                        ),
177                    ));
178                }
179
180                // Track alias expansion
181                self.resource_tracker.enter_alias(&self.limits)?;
182                self.alias_expansion_stack.push(anchor.clone());
183
184                // Look up the anchor - use Rc clone for efficiency
185                let result = match self.anchors.get(&anchor) {
186                    Some(value) => {
187                        // Check if the resolved value's structure depth would exceed alias depth limit
188                        let structure_depth = calculate_optimized_structure_depth(value);
189                        if structure_depth > self.limits.max_alias_depth {
190                            return Err(Error::construction(
191                                event.position,
192                                format!(
193                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
194                                    anchor, structure_depth, self.limits.max_alias_depth
195                                ),
196                            ));
197                        }
198
199                        // Rc clone is very cheap - just increments reference count
200                        let cloned = (**value).clone();
201                        self.resource_tracker
202                            .add_complexity(&self.limits, calculate_complexity(&cloned)?)?;
203                        Ok(Some(cloned))
204                    }
205                    None => Err(Error::construction(
206                        event.position,
207                        format!("Unknown anchor '{}'", anchor),
208                    )),
209                };
210
211                // Clean up tracking
212                self.alias_expansion_stack.pop();
213                self.resource_tracker.exit_alias();
214
215                result
216            }
217        }
218    }
219
220    /// Compose a scalar value with optimization
221    fn compose_scalar_optimized(
222        &self,
223        value: String,
224        style: ScalarStyle,
225    ) -> Result<OptimizedValue> {
226        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
227            return Ok(OptimizedValue::string(value));
228        }
229
230        Ok(
231            match crate::resolver::resolve_plain_scalar(&value, self.yaml_version) {
232                crate::resolver::PlainScalarType::Null => OptimizedValue::Null,
233                crate::resolver::PlainScalarType::Bool(b) => OptimizedValue::Bool(b),
234                crate::resolver::PlainScalarType::Int(i) => OptimizedValue::Int(i),
235                crate::resolver::PlainScalarType::Float(f) => OptimizedValue::Float(f),
236                crate::resolver::PlainScalarType::Str => OptimizedValue::string(value),
237                crate::resolver::PlainScalarType::Value => {
238                    return Err(crate::resolver::value_tag_error(self.position));
239                }
240            },
241        )
242    }
243
244    /// Compose a sequence with reduced allocations
245    fn compose_sequence(&mut self) -> Result<Option<OptimizedValue>> {
246        self.current_depth += 1;
247        self.resource_tracker
248            .check_depth(&self.limits, self.current_depth)?;
249
250        let mut sequence = Vec::new();
251
252        while self.parser.check_event() {
253            if let Ok(Some(event)) = self.parser.peek_event() {
254                if matches!(event.event_type, EventType::SequenceEnd) {
255                    self.parser.get_event()?;
256                    break;
257                } else if matches!(
258                    event.event_type,
259                    EventType::DocumentEnd { .. }
260                        | EventType::DocumentStart { .. }
261                        | EventType::StreamEnd
262                ) {
263                    break;
264                }
265            }
266
267            if let Some(node) = self.compose_node()? {
268                self.resource_tracker.add_collection_item(&self.limits)?;
269                self.resource_tracker.add_complexity(&self.limits, 1)?;
270                sequence.push(node);
271            } else {
272                break;
273            }
274        }
275
276        self.current_depth -= 1;
277        Ok(Some(OptimizedValue::sequence_with(sequence)))
278    }
279
280    /// Compose a mapping with reduced allocations
281    fn compose_mapping(&mut self) -> Result<Option<OptimizedValue>> {
282        self.current_depth += 1;
283        self.resource_tracker
284            .check_depth(&self.limits, self.current_depth)?;
285
286        let mut mapping = IndexMap::new();
287
288        while self.parser.check_event() {
289            if let Ok(Some(event)) = self.parser.peek_event() {
290                if matches!(event.event_type, EventType::MappingEnd) {
291                    self.parser.get_event()?;
292                    break;
293                } else if matches!(
294                    event.event_type,
295                    EventType::DocumentEnd { .. }
296                        | EventType::DocumentStart { .. }
297                        | EventType::StreamEnd
298                ) {
299                    break;
300                }
301            }
302
303            let Some(key) = self.compose_node()? else {
304                break;
305            };
306
307            let value = self.compose_node()?.unwrap_or(OptimizedValue::Null);
308
309            // Check for merge key
310            if let OptimizedValue::String(key_str) = &key {
311                if key_str.as_str() == "<<" {
312                    self.process_merge_key(&mut mapping, &value)?;
313                    continue;
314                }
315            }
316
317            self.resource_tracker.add_collection_item(&self.limits)?;
318            self.resource_tracker.add_complexity(&self.limits, 2)?;
319
320            mapping.insert(key, value);
321        }
322
323        self.current_depth -= 1;
324        Ok(Some(OptimizedValue::mapping_with(
325            mapping.into_iter().collect(),
326        )))
327    }
328
329    /// Process a merge key by merging values into the current mapping
330    fn process_merge_key(
331        &self,
332        mapping: &mut IndexMap<OptimizedValue, OptimizedValue>,
333        merge_value: &OptimizedValue,
334    ) -> Result<()> {
335        match merge_value {
336            // Single mapping to merge
337            OptimizedValue::Mapping(source_map) => {
338                for (key, value) in source_map.iter() {
339                    // Only insert if key doesn't already exist
340                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
341                }
342            }
343
344            // Sequence of mappings to merge
345            OptimizedValue::Sequence(sources) => {
346                for source in sources.iter() {
347                    if let OptimizedValue::Mapping(source_map) = source {
348                        for (key, value) in source_map.iter() {
349                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
350                        }
351                    } else {
352                        return Err(Error::construction(
353                            self.position,
354                            "Merge key sequence can only contain mappings",
355                        ));
356                    }
357                }
358            }
359
360            _ => {
361                return Err(Error::construction(
362                    self.position,
363                    "Merge key value must be a mapping or sequence of mappings",
364                ));
365            }
366        }
367
368        Ok(())
369    }
370}
371
372impl OptimizedComposer for ReducedAllocComposer {
373    fn check_document(&self) -> bool {
374        if let Ok(Some(event)) = self.parser.peek_event() {
375            !matches!(event.event_type, EventType::StreamEnd)
376        } else {
377            false
378        }
379    }
380
381    fn compose_document(&mut self) -> Result<Option<OptimizedValue>> {
382        if let Some(error) = self.parser.take_scanning_error() {
383            return Err(error);
384        }
385
386        // Consume document start events, capturing the YAML version directive.
387        while let Ok(Some(event)) = self.parser.peek_event() {
388            if let EventType::DocumentStart { version, .. } = &event.event_type {
389                self.yaml_version = version
390                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
391                    .unwrap_or_default();
392                self.parser.get_event()?;
393            } else {
394                break;
395            }
396        }
397
398        let document = self.compose_node()?;
399
400        // Skip any document end event
401        while let Ok(Some(event)) = self.parser.peek_event() {
402            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
403                self.parser.get_event()?;
404            } else {
405                break;
406            }
407        }
408
409        Ok(document)
410    }
411
412    fn position(&self) -> Position {
413        self.position
414    }
415
416    fn reset(&mut self) {
417        self.position = Position::new();
418        self.anchors.clear();
419        self.resource_tracker.reset();
420        self.alias_expansion_stack.clear();
421        self.current_depth = 0;
422    }
423}
424
425/// Calculate complexity score for a value
426fn calculate_complexity(value: &OptimizedValue) -> Result<usize> {
427    let mut complexity = 1usize;
428
429    match value {
430        OptimizedValue::Sequence(seq) => {
431            complexity = complexity.saturating_add(seq.len());
432            for item in seq.iter() {
433                complexity = complexity.saturating_add(calculate_complexity(item)?);
434            }
435        }
436        OptimizedValue::Mapping(map) => {
437            complexity = complexity.saturating_add(map.len().saturating_mul(2));
438            for (key, val) in map.iter() {
439                complexity = complexity.saturating_add(calculate_complexity(key)?);
440                complexity = complexity.saturating_add(calculate_complexity(val)?);
441            }
442        }
443        _ => {} // Scalars have complexity 1
444    }
445
446    Ok(complexity)
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    #[test]
454    fn test_optimized_scalar() {
455        let mut composer = ReducedAllocComposer::new("42".to_string());
456        let result = composer.compose_document().unwrap().unwrap();
457        assert_eq!(result, OptimizedValue::Int(42));
458    }
459
460    #[test]
461    fn test_optimized_sequence() {
462        let mut composer = ReducedAllocComposer::new("[1, 2, 3]".to_string());
463        let result = composer.compose_document().unwrap().unwrap();
464
465        if let OptimizedValue::Sequence(seq) = result {
466            assert_eq!(seq.len(), 3);
467        } else {
468            panic!("Expected sequence");
469        }
470    }
471
472    #[test]
473    fn test_anchor_rc_sharing() {
474        let yaml = r#"
475base: &base
476  value: 42
477ref1: *base
478ref2: *base
479"#;
480        let mut composer = ReducedAllocComposer::new(yaml.to_string());
481        let _result = composer.compose_document().unwrap().unwrap();
482
483        // The anchors should use Rc, so cloning should be cheap
484        assert!(composer.anchors.len() > 0);
485    }
486}