rust_yaml/
composer_optimized.rs

1//! Optimized YAML composer that reduces allocations and cloning
2//!
3//! This module provides an optimized composer implementation that
4//! minimizes memory allocations and unnecessary cloning operations.
5
6use crate::{
7    parser::{EventType, ScalarStyle},
8    zero_copy_value::OptimizedValue,
9    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13use std::rc::Rc;
14
15/// Calculate the maximum nesting depth of an optimized value structure
16fn calculate_optimized_structure_depth(value: &OptimizedValue) -> usize {
17    match value {
18        OptimizedValue::Sequence(seq) => {
19            if seq.is_empty() {
20                1
21            } else {
22                1 + seq
23                    .iter()
24                    .map(calculate_optimized_structure_depth)
25                    .max()
26                    .unwrap_or(0)
27            }
28        }
29        OptimizedValue::Mapping(map) => {
30            if map.is_empty() {
31                1
32            } else {
33                1 + map
34                    .values()
35                    .map(calculate_optimized_structure_depth)
36                    .max()
37                    .unwrap_or(0)
38            }
39        }
40        _ => 1, // Scalars have depth 1
41    }
42}
43
44/// Trait for optimized composers
45pub trait OptimizedComposer {
46    /// Check if there are more documents available
47    fn check_document(&self) -> bool;
48
49    /// Compose the next document with minimal allocations
50    fn compose_document(&mut self) -> Result<Option<OptimizedValue>>;
51
52    /// Get the current position in the stream
53    fn position(&self) -> Position;
54
55    /// Reset the composer state
56    fn reset(&mut self);
57}
58
59/// An optimized composer that reduces allocations
60pub struct ReducedAllocComposer {
61    parser: BasicParser,
62    position: Position,
63    /// Store anchors using Rc for cheap cloning
64    anchors: HashMap<String, Rc<OptimizedValue>>,
65    limits: Limits,
66    resource_tracker: ResourceTracker,
67    alias_expansion_stack: Vec<String>,
68    current_depth: usize,
69}
70
71impl ReducedAllocComposer {
72    /// Create a new optimized composer
73    pub fn new(input: String) -> Self {
74        Self::with_limits(input, Limits::default())
75    }
76
77    /// Create a new optimized composer with custom limits
78    pub fn with_limits(input: String, limits: Limits) -> Self {
79        Self {
80            parser: BasicParser::with_limits(input, limits.clone()),
81            position: Position::new(),
82            anchors: HashMap::new(),
83            limits,
84            resource_tracker: ResourceTracker::new(),
85            alias_expansion_stack: Vec::new(),
86            current_depth: 0,
87        }
88    }
89
90    /// Compose a node from events with reduced allocations
91    fn compose_node(&mut self) -> Result<Option<OptimizedValue>> {
92        if !self.parser.check_event() {
93            return Ok(None);
94        }
95
96        let Some(event) = self.parser.get_event()? else {
97            return Ok(None);
98        };
99
100        self.position = event.position;
101
102        match event.event_type {
103            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
104
105            EventType::DocumentStart { .. } => self.compose_node(),
106
107            EventType::DocumentEnd { .. } => Ok(None),
108
109            EventType::Scalar {
110                value,
111                anchor,
112                style,
113                ..
114            } => {
115                let scalar_value = self.compose_scalar_optimized(value, style)?;
116
117                // Store anchor if present - use Rc for cheap cloning
118                if let Some(anchor_name) = anchor {
119                    self.resource_tracker.add_anchor(&self.limits)?;
120                    self.anchors
121                        .insert(anchor_name, Rc::new(scalar_value.clone()));
122                }
123
124                Ok(Some(scalar_value))
125            }
126
127            EventType::SequenceStart { anchor, .. } => {
128                let sequence = self.compose_sequence()?;
129
130                // Store anchor if present
131                if let Some(anchor_name) = anchor {
132                    if let Some(ref seq) = sequence {
133                        self.resource_tracker.add_anchor(&self.limits)?;
134                        self.anchors.insert(anchor_name, Rc::new(seq.clone()));
135                    }
136                }
137
138                Ok(sequence)
139            }
140
141            EventType::MappingStart { anchor, .. } => {
142                let mapping = self.compose_mapping()?;
143
144                // Store anchor if present
145                if let Some(anchor_name) = anchor {
146                    if let Some(ref map) = mapping {
147                        self.resource_tracker.add_anchor(&self.limits)?;
148                        self.anchors.insert(anchor_name, Rc::new(map.clone()));
149                    }
150                }
151
152                Ok(mapping)
153            }
154
155            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
156
157            EventType::Alias { anchor } => {
158                // Check for cyclic references
159                if self.alias_expansion_stack.contains(&anchor) {
160                    return Err(Error::construction(
161                        event.position,
162                        format!("Cyclic alias reference detected: '{}'", anchor),
163                    ));
164                }
165
166                // Check alias expansion depth limit BEFORE pushing
167                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
168                    return Err(Error::construction(
169                        event.position,
170                        format!(
171                            "Maximum alias expansion depth {} exceeded",
172                            self.limits.max_alias_depth
173                        ),
174                    ));
175                }
176
177                // Track alias expansion
178                self.resource_tracker.enter_alias(&self.limits)?;
179                self.alias_expansion_stack.push(anchor.clone());
180
181                // Look up the anchor - use Rc clone for efficiency
182                let result = match self.anchors.get(&anchor) {
183                    Some(value) => {
184                        // Check if the resolved value's structure depth would exceed alias depth limit
185                        let structure_depth = calculate_optimized_structure_depth(value);
186                        if structure_depth > self.limits.max_alias_depth {
187                            return Err(Error::construction(
188                                event.position,
189                                format!(
190                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
191                                    anchor, structure_depth, self.limits.max_alias_depth
192                                ),
193                            ));
194                        }
195
196                        // Rc clone is very cheap - just increments reference count
197                        let cloned = (**value).clone();
198                        self.resource_tracker
199                            .add_complexity(&self.limits, calculate_complexity(&cloned)?)?;
200                        Ok(Some(cloned))
201                    }
202                    None => Err(Error::construction(
203                        event.position,
204                        format!("Unknown anchor '{}'", anchor),
205                    )),
206                };
207
208                // Clean up tracking
209                self.alias_expansion_stack.pop();
210                self.resource_tracker.exit_alias();
211
212                result
213            }
214        }
215    }
216
217    /// Compose a scalar value with optimization
218    fn compose_scalar_optimized(
219        &self,
220        value: String,
221        style: ScalarStyle,
222    ) -> Result<OptimizedValue> {
223        // If explicitly quoted, always treat as string
224        match style {
225            ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => {
226                return Ok(OptimizedValue::string(value));
227            }
228            _ => {}
229        }
230
231        // Type resolution for unquoted scalars
232        if value.is_empty() {
233            return Ok(OptimizedValue::string(value));
234        }
235
236        // Try integer parsing
237        if let Ok(int_value) = value.parse::<i64>() {
238            return Ok(OptimizedValue::Int(int_value));
239        }
240
241        // Try float parsing
242        if let Ok(float_value) = value.parse::<f64>() {
243            return Ok(OptimizedValue::Float(float_value));
244        }
245
246        // Try boolean parsing
247        match value.to_lowercase().as_str() {
248            "true" | "yes" | "on" => return Ok(OptimizedValue::Bool(true)),
249            "false" | "no" | "off" => return Ok(OptimizedValue::Bool(false)),
250            "null" | "~" => return Ok(OptimizedValue::Null),
251            _ => {}
252        }
253
254        // Default to string
255        Ok(OptimizedValue::string(value))
256    }
257
258    /// Compose a sequence with reduced allocations
259    fn compose_sequence(&mut self) -> Result<Option<OptimizedValue>> {
260        self.current_depth += 1;
261        self.resource_tracker
262            .check_depth(&self.limits, self.current_depth)?;
263
264        let mut sequence = Vec::new();
265
266        while self.parser.check_event() {
267            if let Ok(Some(event)) = self.parser.peek_event() {
268                if matches!(event.event_type, EventType::SequenceEnd) {
269                    self.parser.get_event()?;
270                    break;
271                } else if matches!(
272                    event.event_type,
273                    EventType::DocumentEnd { .. }
274                        | EventType::DocumentStart { .. }
275                        | EventType::StreamEnd
276                ) {
277                    break;
278                }
279            }
280
281            if let Some(node) = self.compose_node()? {
282                self.resource_tracker.add_collection_item(&self.limits)?;
283                self.resource_tracker.add_complexity(&self.limits, 1)?;
284                sequence.push(node);
285            } else {
286                break;
287            }
288        }
289
290        self.current_depth -= 1;
291        Ok(Some(OptimizedValue::sequence_with(sequence)))
292    }
293
294    /// Compose a mapping with reduced allocations
295    fn compose_mapping(&mut self) -> Result<Option<OptimizedValue>> {
296        self.current_depth += 1;
297        self.resource_tracker
298            .check_depth(&self.limits, self.current_depth)?;
299
300        let mut mapping = IndexMap::new();
301
302        while self.parser.check_event() {
303            if let Ok(Some(event)) = self.parser.peek_event() {
304                if matches!(event.event_type, EventType::MappingEnd) {
305                    self.parser.get_event()?;
306                    break;
307                } else if matches!(
308                    event.event_type,
309                    EventType::DocumentEnd { .. }
310                        | EventType::DocumentStart { .. }
311                        | EventType::StreamEnd
312                ) {
313                    break;
314                }
315            }
316
317            let Some(key) = self.compose_node()? else {
318                break;
319            };
320
321            let value = self.compose_node()?.unwrap_or(OptimizedValue::Null);
322
323            // Check for merge key
324            if let OptimizedValue::String(key_str) = &key {
325                if key_str.as_str() == "<<" {
326                    self.process_merge_key(&mut mapping, &value)?;
327                    continue;
328                }
329            }
330
331            self.resource_tracker.add_collection_item(&self.limits)?;
332            self.resource_tracker.add_complexity(&self.limits, 2)?;
333
334            mapping.insert(key, value);
335        }
336
337        self.current_depth -= 1;
338        Ok(Some(OptimizedValue::mapping_with(
339            mapping.into_iter().collect(),
340        )))
341    }
342
343    /// Process a merge key by merging values into the current mapping
344    fn process_merge_key(
345        &self,
346        mapping: &mut IndexMap<OptimizedValue, OptimizedValue>,
347        merge_value: &OptimizedValue,
348    ) -> Result<()> {
349        match merge_value {
350            // Single mapping to merge
351            OptimizedValue::Mapping(source_map) => {
352                for (key, value) in source_map.iter() {
353                    // Only insert if key doesn't already exist
354                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
355                }
356            }
357
358            // Sequence of mappings to merge
359            OptimizedValue::Sequence(sources) => {
360                for source in sources.iter() {
361                    if let OptimizedValue::Mapping(source_map) = source {
362                        for (key, value) in source_map.iter() {
363                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
364                        }
365                    } else {
366                        return Err(Error::construction(
367                            self.position,
368                            "Merge key sequence can only contain mappings",
369                        ));
370                    }
371                }
372            }
373
374            _ => {
375                return Err(Error::construction(
376                    self.position,
377                    "Merge key value must be a mapping or sequence of mappings",
378                ));
379            }
380        }
381
382        Ok(())
383    }
384}
385
386impl OptimizedComposer for ReducedAllocComposer {
387    fn check_document(&self) -> bool {
388        if let Ok(Some(event)) = self.parser.peek_event() {
389            !matches!(event.event_type, EventType::StreamEnd)
390        } else {
391            false
392        }
393    }
394
395    fn compose_document(&mut self) -> Result<Option<OptimizedValue>> {
396        if let Some(error) = self.parser.take_scanning_error() {
397            return Err(error);
398        }
399
400        // Skip any leading document start events
401        while let Ok(Some(event)) = self.parser.peek_event() {
402            if matches!(event.event_type, EventType::DocumentStart { .. }) {
403                self.parser.get_event()?;
404            } else {
405                break;
406            }
407        }
408
409        let document = self.compose_node()?;
410
411        // Skip any document end event
412        while let Ok(Some(event)) = self.parser.peek_event() {
413            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
414                self.parser.get_event()?;
415            } else {
416                break;
417            }
418        }
419
420        Ok(document)
421    }
422
423    fn position(&self) -> Position {
424        self.position
425    }
426
427    fn reset(&mut self) {
428        self.position = Position::new();
429        self.anchors.clear();
430        self.resource_tracker.reset();
431        self.alias_expansion_stack.clear();
432        self.current_depth = 0;
433    }
434}
435
436/// Calculate complexity score for a value
437fn calculate_complexity(value: &OptimizedValue) -> Result<usize> {
438    let mut complexity = 1usize;
439
440    match value {
441        OptimizedValue::Sequence(seq) => {
442            complexity = complexity.saturating_add(seq.len());
443            for item in seq.iter() {
444                complexity = complexity.saturating_add(calculate_complexity(item)?);
445            }
446        }
447        OptimizedValue::Mapping(map) => {
448            complexity = complexity.saturating_add(map.len().saturating_mul(2));
449            for (key, val) in map.iter() {
450                complexity = complexity.saturating_add(calculate_complexity(key)?);
451                complexity = complexity.saturating_add(calculate_complexity(val)?);
452            }
453        }
454        _ => {} // Scalars have complexity 1
455    }
456
457    Ok(complexity)
458}
459
460#[cfg(test)]
461mod tests {
462    use super::*;
463
464    #[test]
465    fn test_optimized_scalar() {
466        let mut composer = ReducedAllocComposer::new("42".to_string());
467        let result = composer.compose_document().unwrap().unwrap();
468        assert_eq!(result, OptimizedValue::Int(42));
469    }
470
471    #[test]
472    fn test_optimized_sequence() {
473        let mut composer = ReducedAllocComposer::new("[1, 2, 3]".to_string());
474        let result = composer.compose_document().unwrap().unwrap();
475
476        if let OptimizedValue::Sequence(seq) = result {
477            assert_eq!(seq.len(), 3);
478        } else {
479            panic!("Expected sequence");
480        }
481    }
482
483    #[test]
484    fn test_anchor_rc_sharing() {
485        let yaml = r#"
486base: &base
487  value: 42
488ref1: *base
489ref2: *base
490"#;
491        let mut composer = ReducedAllocComposer::new(yaml.to_string());
492        let _result = composer.compose_document().unwrap().unwrap();
493
494        // The anchors should use Rc, so cloning should be cheap
495        assert!(composer.anchors.len() > 0);
496    }
497}