Skip to main content

rust_yaml/
composer_optimized.rs

1//! Optimized YAML composer that reduces allocations and cloning
2//!
3//! This module provides an optimized composer implementation that
4//! minimizes memory allocations and unnecessary cloning operations.
5
6use crate::{
7    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
8    parser::{EventType, ScalarStyle},
9    zero_copy_value::OptimizedValue,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13use std::rc::Rc;
14
15/// Iterative DFS — see [`crate::composer::calculate_structure_depth`] (#16).
16fn calculate_optimized_structure_depth(value: &OptimizedValue) -> usize {
17    let mut max_depth: usize = 1;
18    let mut stack: Vec<(&OptimizedValue, usize)> = vec![(value, 1)];
19    while let Some((node, depth)) = stack.pop() {
20        if depth > max_depth {
21            max_depth = depth;
22        }
23        let next = depth.saturating_add(1);
24        match node {
25            OptimizedValue::Sequence(seq) => {
26                for item in seq.iter() {
27                    stack.push((item, next));
28                }
29            }
30            OptimizedValue::Mapping(map) => {
31                for (_, v) in map.iter() {
32                    stack.push((v, next));
33                }
34            }
35            _ => {}
36        }
37    }
38    max_depth
39}
40
41/// Trait for optimized composers
42pub trait OptimizedComposer {
43    /// Check if there are more documents available
44    fn check_document(&self) -> bool;
45
46    /// Compose the next document with minimal allocations
47    fn compose_document(&mut self) -> Result<Option<OptimizedValue>>;
48
49    /// Get the current position in the stream
50    fn position(&self) -> Position;
51
52    /// Reset the composer state
53    fn reset(&mut self);
54}
55
56/// An optimized composer that reduces allocations
57pub struct ReducedAllocComposer {
58    parser: BasicParser,
59    position: Position,
60    /// Store anchors using Rc for cheap cloning
61    anchors: HashMap<String, Rc<OptimizedValue>>,
62    limits: Limits,
63    resource_tracker: ResourceTracker,
64    alias_expansion_stack: Vec<String>,
65    current_depth: usize,
66    /// Active YAML spec version for the current document.
67    yaml_version: crate::version::YamlVersion,
68}
69
70impl ReducedAllocComposer {
71    /// Create a new optimized composer
72    pub fn new(input: String) -> Self {
73        Self::with_limits(input, Limits::default())
74    }
75
76    /// Create a new optimized composer with custom limits
77    pub fn with_limits(input: String, limits: Limits) -> Self {
78        Self {
79            parser: BasicParser::with_limits(input, limits.clone()),
80            position: Position::new(),
81            anchors: HashMap::new(),
82            limits,
83            resource_tracker: ResourceTracker::new(),
84            alias_expansion_stack: Vec::new(),
85            current_depth: 0,
86            yaml_version: crate::version::YamlVersion::default(),
87        }
88    }
89
90    /// Compose a node from events with reduced allocations
91    fn compose_node(&mut self) -> Result<Option<OptimizedValue>> {
92        if !self.parser.check_event() {
93            return Ok(None);
94        }
95
96        let Some(event) = self.parser.get_event()? else {
97            return Ok(None);
98        };
99
100        self.position = event.position;
101
102        match event.event_type {
103            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
104
105            EventType::DocumentStart { .. } => self.compose_node(),
106
107            EventType::DocumentEnd { .. } => Ok(None),
108
109            EventType::Scalar {
110                value,
111                anchor,
112                style,
113                ..
114            } => {
115                let scalar_value = self.compose_scalar_optimized(value, style)?;
116
117                // Store anchor if present - use Rc for cheap cloning
118                if let Some(anchor_name) = anchor {
119                    self.resource_tracker.add_anchor(&self.limits)?;
120                    self.anchors
121                        .insert(anchor_name, Rc::new(scalar_value.clone()));
122                }
123
124                Ok(Some(scalar_value))
125            }
126
127            EventType::SequenceStart { anchor, .. } => {
128                let sequence = self.compose_sequence()?;
129
130                // Store anchor if present
131                if let Some(anchor_name) = anchor {
132                    if let Some(ref seq) = sequence {
133                        self.resource_tracker.add_anchor(&self.limits)?;
134                        self.anchors.insert(anchor_name, Rc::new(seq.clone()));
135                    }
136                }
137
138                Ok(sequence)
139            }
140
141            EventType::MappingStart { anchor, .. } => {
142                let mapping = self.compose_mapping()?;
143
144                // Store anchor if present
145                if let Some(anchor_name) = anchor {
146                    if let Some(ref map) = mapping {
147                        self.resource_tracker.add_anchor(&self.limits)?;
148                        self.anchors.insert(anchor_name, Rc::new(map.clone()));
149                    }
150                }
151
152                Ok(mapping)
153            }
154
155            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
156
157            EventType::Alias { anchor } => {
158                // Check for cyclic references
159                if self.alias_expansion_stack.contains(&anchor) {
160                    return Err(Error::construction(
161                        event.position,
162                        format!("Cyclic alias reference detected: '{}'", anchor),
163                    ));
164                }
165
166                // Check alias expansion depth limit BEFORE pushing
167                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
168                    return Err(Error::construction(
169                        event.position,
170                        format!(
171                            "Maximum alias expansion depth {} exceeded",
172                            self.limits.max_alias_depth
173                        ),
174                    ));
175                }
176
177                // Track alias expansion
178                self.resource_tracker.enter_alias(&self.limits)?;
179                self.alias_expansion_stack.push(anchor.clone());
180
181                // Look up the anchor - use Rc clone for efficiency
182                let result = match self.anchors.get(&anchor) {
183                    Some(value) => {
184                        // Check if the resolved value's structure depth would exceed alias depth limit
185                        let structure_depth = calculate_optimized_structure_depth(value);
186                        if structure_depth > self.limits.max_alias_depth {
187                            return Err(Error::construction(
188                                event.position,
189                                format!(
190                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
191                                    anchor, structure_depth, self.limits.max_alias_depth
192                                ),
193                            ));
194                        }
195
196                        // Rc clone is very cheap - just increments reference count
197                        let cloned = (**value).clone();
198                        let nodes = calculate_complexity(&cloned)?;
199                        // Cap cumulative alias materialization BEFORE
200                        // committing the clone (#15 billion-laughs gap).
201                        self.resource_tracker
202                            .add_alias_materialization(&self.limits, nodes)?;
203                        self.resource_tracker.add_complexity(&self.limits, nodes)?;
204                        Ok(Some(cloned))
205                    }
206                    None => Err(Error::construction(
207                        event.position,
208                        format!("Unknown anchor '{}'", anchor),
209                    )),
210                };
211
212                // Clean up tracking
213                self.alias_expansion_stack.pop();
214                self.resource_tracker.exit_alias();
215
216                result
217            }
218        }
219    }
220
221    /// Compose a scalar value with optimization
222    fn compose_scalar_optimized(
223        &self,
224        value: String,
225        style: ScalarStyle,
226    ) -> Result<OptimizedValue> {
227        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
228            return Ok(OptimizedValue::string(value));
229        }
230
231        Ok(
232            match crate::resolver::resolve_plain_scalar(&value, self.yaml_version) {
233                crate::resolver::PlainScalarType::Null => OptimizedValue::Null,
234                crate::resolver::PlainScalarType::Bool(b) => OptimizedValue::Bool(b),
235                crate::resolver::PlainScalarType::Int(i) => OptimizedValue::Int(i),
236                crate::resolver::PlainScalarType::Float(f) => OptimizedValue::Float(f),
237                crate::resolver::PlainScalarType::Str => OptimizedValue::string(value),
238                crate::resolver::PlainScalarType::Value => {
239                    return Err(crate::resolver::value_tag_error(self.position));
240                }
241            },
242        )
243    }
244
245    /// Compose a sequence with reduced allocations
246    fn compose_sequence(&mut self) -> Result<Option<OptimizedValue>> {
247        self.current_depth += 1;
248        self.resource_tracker
249            .check_depth(&self.limits, self.current_depth)?;
250
251        let mut sequence = Vec::new();
252
253        while self.parser.check_event() {
254            if let Ok(Some(event)) = self.parser.peek_event() {
255                if matches!(event.event_type, EventType::SequenceEnd) {
256                    self.parser.get_event()?;
257                    break;
258                } else if matches!(
259                    event.event_type,
260                    EventType::DocumentEnd { .. }
261                        | EventType::DocumentStart { .. }
262                        | EventType::StreamEnd
263                ) {
264                    break;
265                }
266            }
267
268            if let Some(node) = self.compose_node()? {
269                self.resource_tracker.add_collection_item(&self.limits)?;
270                self.resource_tracker.add_complexity(&self.limits, 1)?;
271                sequence.push(node);
272            } else {
273                break;
274            }
275        }
276
277        self.current_depth -= 1;
278        Ok(Some(OptimizedValue::sequence_with(sequence)))
279    }
280
281    /// Compose a mapping with reduced allocations
282    fn compose_mapping(&mut self) -> Result<Option<OptimizedValue>> {
283        self.current_depth += 1;
284        self.resource_tracker
285            .check_depth(&self.limits, self.current_depth)?;
286
287        let mut mapping = IndexMap::new();
288
289        while self.parser.check_event() {
290            if let Ok(Some(event)) = self.parser.peek_event() {
291                if matches!(event.event_type, EventType::MappingEnd) {
292                    self.parser.get_event()?;
293                    break;
294                } else if matches!(
295                    event.event_type,
296                    EventType::DocumentEnd { .. }
297                        | EventType::DocumentStart { .. }
298                        | EventType::StreamEnd
299                ) {
300                    break;
301                }
302            }
303
304            let Some(key) = self.compose_node()? else {
305                break;
306            };
307
308            let value = self.compose_node()?.unwrap_or(OptimizedValue::Null);
309
310            // Check for merge key
311            if let OptimizedValue::String(key_str) = &key {
312                if key_str.as_str() == "<<" {
313                    self.process_merge_key(&mut mapping, &value)?;
314                    continue;
315                }
316            }
317
318            self.resource_tracker.add_collection_item(&self.limits)?;
319            self.resource_tracker.add_complexity(&self.limits, 2)?;
320
321            mapping.insert(key, value);
322        }
323
324        self.current_depth -= 1;
325        Ok(Some(OptimizedValue::mapping_with(
326            mapping.into_iter().collect(),
327        )))
328    }
329
330    /// Process a merge key by merging values into the current mapping
331    fn process_merge_key(
332        &self,
333        mapping: &mut IndexMap<OptimizedValue, OptimizedValue>,
334        merge_value: &OptimizedValue,
335    ) -> Result<()> {
336        match merge_value {
337            // Single mapping to merge
338            OptimizedValue::Mapping(source_map) => {
339                for (key, value) in source_map.iter() {
340                    // Only insert if key doesn't already exist
341                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
342                }
343            }
344
345            // Sequence of mappings to merge
346            OptimizedValue::Sequence(sources) => {
347                for source in sources.iter() {
348                    if let OptimizedValue::Mapping(source_map) = source {
349                        for (key, value) in source_map.iter() {
350                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
351                        }
352                    } else {
353                        return Err(Error::construction(
354                            self.position,
355                            "Merge key sequence can only contain mappings",
356                        ));
357                    }
358                }
359            }
360
361            _ => {
362                return Err(Error::construction(
363                    self.position,
364                    "Merge key value must be a mapping or sequence of mappings",
365                ));
366            }
367        }
368
369        Ok(())
370    }
371}
372
373impl OptimizedComposer for ReducedAllocComposer {
374    fn check_document(&self) -> bool {
375        if let Ok(Some(event)) = self.parser.peek_event() {
376            !matches!(event.event_type, EventType::StreamEnd)
377        } else {
378            false
379        }
380    }
381
382    fn compose_document(&mut self) -> Result<Option<OptimizedValue>> {
383        if let Some(error) = self.parser.take_scanning_error() {
384            return Err(error);
385        }
386
387        // Consume document start events, capturing the YAML version directive.
388        while let Ok(Some(event)) = self.parser.peek_event() {
389            if let EventType::DocumentStart { version, .. } = &event.event_type {
390                self.yaml_version = version
391                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
392                    .unwrap_or_default();
393                self.parser.get_event()?;
394            } else {
395                break;
396            }
397        }
398
399        let document = self.compose_node()?;
400
401        // Skip any document end event
402        while let Ok(Some(event)) = self.parser.peek_event() {
403            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
404                self.parser.get_event()?;
405            } else {
406                break;
407            }
408        }
409
410        Ok(document)
411    }
412
413    fn position(&self) -> Position {
414        self.position
415    }
416
417    fn reset(&mut self) {
418        self.position = Position::new();
419        self.anchors.clear();
420        self.resource_tracker.reset();
421        self.alias_expansion_stack.clear();
422        self.current_depth = 0;
423    }
424}
425
426/// Calculate complexity score for a value. Iterative for stack safety (#16).
427fn calculate_complexity(value: &OptimizedValue) -> Result<usize> {
428    let mut total: usize = 0;
429    let mut stack: Vec<&OptimizedValue> = vec![value];
430    while let Some(node) = stack.pop() {
431        match node {
432            OptimizedValue::Sequence(seq) => {
433                total = total.saturating_add(1usize.saturating_add(seq.len()));
434                for item in seq.iter() {
435                    stack.push(item);
436                }
437            }
438            OptimizedValue::Mapping(map) => {
439                total = total.saturating_add(1usize.saturating_add(map.len().saturating_mul(2)));
440                for (k, v) in map.iter() {
441                    stack.push(k);
442                    stack.push(v);
443                }
444            }
445            _ => total = total.saturating_add(1),
446        }
447    }
448    Ok(total)
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454
455    #[test]
456    fn test_optimized_scalar() {
457        let mut composer = ReducedAllocComposer::new("42".to_string());
458        let result = composer.compose_document().unwrap().unwrap();
459        assert_eq!(result, OptimizedValue::Int(42));
460    }
461
462    #[test]
463    fn test_optimized_sequence() {
464        let mut composer = ReducedAllocComposer::new("[1, 2, 3]".to_string());
465        let result = composer.compose_document().unwrap().unwrap();
466
467        if let OptimizedValue::Sequence(seq) = result {
468            assert_eq!(seq.len(), 3);
469        } else {
470            panic!("Expected sequence");
471        }
472    }
473
474    #[test]
475    fn test_anchor_rc_sharing() {
476        let yaml = r#"
477base: &base
478  value: 42
479ref1: *base
480ref2: *base
481"#;
482        let mut composer = ReducedAllocComposer::new(yaml.to_string());
483        let _result = composer.compose_document().unwrap().unwrap();
484
485        // The anchors should use Rc, so cloning should be cheap
486        assert!(composer.anchors.len() > 0);
487    }
488
489    // ---- iterative helpers (#16) ----
490
491    #[test]
492    fn optimized_structure_depth_scalar_and_empty_is_one() {
493        assert_eq!(
494            calculate_optimized_structure_depth(&OptimizedValue::Null),
495            1
496        );
497        assert_eq!(
498            calculate_optimized_structure_depth(&OptimizedValue::Int(7)),
499            1
500        );
501        assert_eq!(
502            calculate_optimized_structure_depth(&OptimizedValue::Sequence(Rc::new(Vec::new()))),
503            1
504        );
505        assert_eq!(
506            calculate_optimized_structure_depth(&OptimizedValue::Mapping(Rc::new(IndexMap::new()))),
507            1
508        );
509    }
510
511    #[test]
512    fn optimized_structure_depth_nested_sequence() {
513        // [[[1]]] → depth 4
514        let inner = OptimizedValue::Sequence(Rc::new(vec![OptimizedValue::Int(1)]));
515        let mid = OptimizedValue::Sequence(Rc::new(vec![inner]));
516        let outer = OptimizedValue::Sequence(Rc::new(vec![mid]));
517        assert_eq!(calculate_optimized_structure_depth(&outer), 4);
518    }
519
520    #[test]
521    fn optimized_complexity_scalar_is_one() {
522        assert_eq!(calculate_complexity(&OptimizedValue::Null).unwrap(), 1);
523        assert_eq!(calculate_complexity(&OptimizedValue::Int(42)).unwrap(), 1);
524    }
525
526    #[test]
527    fn optimized_complexity_sequence_charges_len_plus_one_plus_children() {
528        // [1, 2, 3] → 1 + 3 + 3*1 = 7
529        let seq = OptimizedValue::Sequence(Rc::new(vec![
530            OptimizedValue::Int(1),
531            OptimizedValue::Int(2),
532            OptimizedValue::Int(3),
533        ]));
534        assert_eq!(calculate_complexity(&seq).unwrap(), 7);
535    }
536
537    #[test]
538    fn optimized_complexity_mapping_charges_two_per_entry_plus_one_plus_children() {
539        // { k1:1, k2:2 } → 1 + 2*2 + 4*1 = 9
540        let mut map = IndexMap::new();
541        map.insert(
542            OptimizedValue::String(Rc::new("k1".to_string())),
543            OptimizedValue::Int(1),
544        );
545        map.insert(
546            OptimizedValue::String(Rc::new("k2".to_string())),
547            OptimizedValue::Int(2),
548        );
549        assert_eq!(
550            calculate_complexity(&OptimizedValue::Mapping(Rc::new(map))).unwrap(),
551            9
552        );
553    }
554
555    // ---- alias materialization cap (#15) on ReducedAllocComposer ----
556
557    fn permissive_with_alias_cap(cap: usize) -> Limits {
558        Limits {
559            max_total_alias_nodes: cap,
560            ..Limits::permissive()
561        }
562    }
563
564    #[test]
565    fn optimized_alias_materialization_cap_fires() {
566        let input = r#"
567a: &a [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
568b: [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
569"#;
570        let mut composer =
571            ReducedAllocComposer::with_limits(input.to_string(), permissive_with_alias_cap(50));
572        let err = composer.compose_document().expect_err("cap should reject");
573        let msg = err.to_string();
574        assert!(
575            msg.contains("alias") || msg.contains("materializ") || msg.contains("limit"),
576            "expected materialization-cap error, got: {msg}"
577        );
578    }
579
580    #[test]
581    fn optimized_alias_below_cap_succeeds() {
582        let input = r#"
583a: &a [1, 2, 3]
584b: [*a, *a]
585"#;
586        let mut composer =
587            ReducedAllocComposer::with_limits(input.to_string(), permissive_with_alias_cap(10_000));
588        let result = composer.compose_document().unwrap().unwrap();
589        assert!(matches!(result, OptimizedValue::Mapping(_)));
590    }
591}