Skip to main content

rust_yaml/
composer_borrowed.rs

1//! Zero-copy YAML composer for converting events to borrowed nodes
2//!
3//! This module provides a composer that minimizes allocations by using
4//! borrowed data structures where possible.
5
6use crate::{
7    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
8    parser::{EventType, ScalarStyle},
9    value_borrowed::BorrowedValue,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13
14/// Calculate the maximum nesting depth of a borrowed value structure
15/// Iterative DFS — see [`crate::composer::calculate_structure_depth`] (#16).
16fn calculate_borrowed_structure_depth(value: &BorrowedValue) -> usize {
17    let mut max_depth: usize = 1;
18    let mut stack: Vec<(&BorrowedValue, usize)> = vec![(value, 1)];
19    while let Some((node, depth)) = stack.pop() {
20        if depth > max_depth {
21            max_depth = depth;
22        }
23        let next = depth.saturating_add(1);
24        match node {
25            BorrowedValue::Sequence(seq) => {
26                for item in seq {
27                    stack.push((item, next));
28                }
29            }
30            BorrowedValue::Mapping(map) => {
31                for (_, v) in map {
32                    stack.push((v, next));
33                }
34            }
35            _ => {}
36        }
37    }
38    max_depth
39}
40
41/// Cost of materialising a borrowed value, used to charge alias expansion
42/// against `max_total_alias_nodes` (#15). Iterative for stack safety (#16).
43fn calculate_borrowed_complexity(value: &BorrowedValue) -> usize {
44    let mut total: usize = 0;
45    let mut stack: Vec<&BorrowedValue> = vec![value];
46    while let Some(node) = stack.pop() {
47        match node {
48            BorrowedValue::Sequence(seq) => {
49                total = total.saturating_add(1usize.saturating_add(seq.len()));
50                for item in seq {
51                    stack.push(item);
52                }
53            }
54            BorrowedValue::Mapping(map) => {
55                total = total.saturating_add(1usize.saturating_add(map.len().saturating_mul(2)));
56                for (k, v) in map {
57                    stack.push(k);
58                    stack.push(v);
59                }
60            }
61            _ => total = total.saturating_add(1),
62        }
63    }
64    total
65}
66
67/// Trait for zero-copy YAML composers
68pub trait BorrowedComposer<'a> {
69    /// Check if there are more documents available
70    fn check_document(&self) -> bool;
71
72    /// Compose the next document with minimal allocations
73    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>>;
74
75    /// Get the current position in the stream
76    fn position(&self) -> Position;
77
78    /// Reset the composer state
79    fn reset(&mut self);
80}
81
82/// A zero-copy composer implementation
83pub struct ZeroCopyComposer<'a> {
84    parser: BasicParser,
85    position: Position,
86    /// Store anchors as borrowed values when possible
87    anchors: HashMap<&'a str, BorrowedValue<'a>>,
88    limits: Limits,
89    resource_tracker: ResourceTracker,
90    alias_expansion_stack: Vec<&'a str>,
91    current_depth: usize,
92    /// Reference to the input string for borrowing
93    input: &'a str,
94    /// Active YAML spec version for the current document.
95    yaml_version: crate::version::YamlVersion,
96}
97
98impl<'a> ZeroCopyComposer<'a> {
99    /// Create a new zero-copy composer
100    pub fn new(input: &'a str) -> Self {
101        Self::with_limits(input, Limits::default())
102    }
103
104    /// Create a new zero-copy composer with custom limits
105    pub fn with_limits(input: &'a str, limits: Limits) -> Self {
106        Self {
107            parser: BasicParser::with_limits(input.to_string(), limits.clone()),
108            position: Position::new(),
109            anchors: HashMap::new(),
110            limits,
111            resource_tracker: ResourceTracker::new(),
112            alias_expansion_stack: Vec::new(),
113            current_depth: 0,
114            input,
115            yaml_version: crate::version::YamlVersion::default(),
116        }
117    }
118
119    /// Compose a node from events with minimal allocations
120    fn compose_node(&mut self) -> Result<Option<BorrowedValue<'a>>> {
121        if !self.parser.check_event() {
122            return Ok(None);
123        }
124
125        let Some(event) = self.parser.get_event()? else {
126            return Ok(None);
127        };
128
129        self.position = event.position;
130
131        match event.event_type {
132            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
133
134            EventType::DocumentStart { .. } => self.compose_node(),
135
136            EventType::DocumentEnd { .. } => Ok(None),
137
138            EventType::Scalar {
139                value,
140                anchor,
141                style,
142                ..
143            } => {
144                let scalar_value = self.compose_scalar_borrowed(&value, style)?;
145
146                // Store anchor if present - we need to clone here unfortunately
147                if let Some(anchor_name) = anchor {
148                    // We need to leak the string to get a 'static reference
149                    // In a real implementation, we'd use an arena allocator
150                    let anchor_str = Box::leak(anchor_name.into_boxed_str());
151                    self.anchors
152                        .insert(anchor_str, scalar_value.clone_if_needed());
153                }
154
155                Ok(Some(scalar_value))
156            }
157
158            EventType::SequenceStart { anchor, .. } => {
159                let sequence = self.compose_sequence()?;
160
161                // Store anchor if present
162                if let Some(anchor_name) = anchor {
163                    if let Some(ref seq) = sequence {
164                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
165                        self.anchors.insert(anchor_str, seq.clone_if_needed());
166                    }
167                }
168
169                Ok(sequence)
170            }
171
172            EventType::MappingStart { anchor, .. } => {
173                let mapping = self.compose_mapping()?;
174
175                // Store anchor if present
176                if let Some(anchor_name) = anchor {
177                    if let Some(ref map) = mapping {
178                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
179                        self.anchors.insert(anchor_str, map.clone_if_needed());
180                    }
181                }
182
183                Ok(mapping)
184            }
185
186            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
187
188            EventType::Alias { anchor } => {
189                // Check for cyclic references
190                let anchor_str = anchor.as_str();
191                if self.alias_expansion_stack.iter().any(|&a| a == anchor_str) {
192                    return Err(Error::construction(
193                        event.position,
194                        format!("Cyclic alias reference detected: '{}'", anchor_str),
195                    ));
196                }
197
198                // Check alias expansion depth limit BEFORE pushing
199                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
200                    return Err(Error::construction(
201                        event.position,
202                        format!(
203                            "Maximum alias expansion depth {} exceeded",
204                            self.limits.max_alias_depth
205                        ),
206                    ));
207                }
208
209                // Track alias expansion
210                self.resource_tracker.enter_alias(&self.limits)?;
211
212                // Look up the anchor - try to avoid cloning if possible
213                let result = match self.anchors.get(anchor_str) {
214                    Some(value) => {
215                        // Check if the resolved value's structure depth would exceed alias depth limit
216                        let structure_depth = calculate_borrowed_structure_depth(value);
217                        if structure_depth > self.limits.max_alias_depth {
218                            return Err(Error::construction(
219                                event.position,
220                                format!(
221                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
222                                    anchor_str, structure_depth, self.limits.max_alias_depth
223                                ),
224                            ));
225                        }
226
227                        // Cap cumulative alias materialization BEFORE
228                        // committing the clone (#15 billion-laughs gap).
229                        let nodes = calculate_borrowed_complexity(value);
230                        self.resource_tracker
231                            .add_alias_materialization(&self.limits, nodes)?;
232                        self.resource_tracker.add_complexity(&self.limits, nodes)?;
233                        Ok(Some(value.clone_if_needed()))
234                    }
235                    None => Err(Error::construction(
236                        event.position,
237                        format!("Unknown anchor '{}'", anchor_str),
238                    )),
239                };
240
241                self.resource_tracker.exit_alias();
242                result
243            }
244        }
245    }
246
247    /// Compose a scalar value with borrowing when possible
248    fn compose_scalar_borrowed(
249        &self,
250        value: &str,
251        style: ScalarStyle,
252    ) -> Result<BorrowedValue<'a>> {
253        // Explicitly-quoted scalars are always strings.
254        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
255            return Ok(BorrowedValue::owned_string(value.to_string()));
256        }
257
258        Ok(
259            match crate::resolver::resolve_plain_scalar(value, self.yaml_version) {
260                crate::resolver::PlainScalarType::Null => BorrowedValue::Null,
261                crate::resolver::PlainScalarType::Bool(b) => BorrowedValue::Bool(b),
262                crate::resolver::PlainScalarType::Int(i) => BorrowedValue::Int(i),
263                crate::resolver::PlainScalarType::Float(f) => BorrowedValue::Float(f),
264                crate::resolver::PlainScalarType::Str => {
265                    BorrowedValue::owned_string(value.to_string())
266                }
267                crate::resolver::PlainScalarType::Value => {
268                    return Err(crate::resolver::value_tag_error(self.position));
269                }
270            },
271        )
272    }
273
274    /// Compose a sequence with minimal allocations
275    fn compose_sequence(&mut self) -> Result<Option<BorrowedValue<'a>>> {
276        self.current_depth += 1;
277        self.resource_tracker
278            .check_depth(&self.limits, self.current_depth)?;
279
280        let mut sequence = Vec::new();
281
282        while self.parser.check_event() {
283            if let Ok(Some(event)) = self.parser.peek_event() {
284                if matches!(event.event_type, EventType::SequenceEnd) {
285                    self.parser.get_event()?;
286                    break;
287                } else if matches!(
288                    event.event_type,
289                    EventType::DocumentEnd { .. }
290                        | EventType::DocumentStart { .. }
291                        | EventType::StreamEnd
292                ) {
293                    break;
294                }
295            }
296
297            if let Some(node) = self.compose_node()? {
298                self.resource_tracker.add_collection_item(&self.limits)?;
299                self.resource_tracker.add_complexity(&self.limits, 1)?;
300                sequence.push(node);
301            } else {
302                break;
303            }
304        }
305
306        self.current_depth -= 1;
307        Ok(Some(BorrowedValue::Sequence(sequence)))
308    }
309
310    /// Compose a mapping with minimal allocations
311    fn compose_mapping(&mut self) -> Result<Option<BorrowedValue<'a>>> {
312        self.current_depth += 1;
313        self.resource_tracker
314            .check_depth(&self.limits, self.current_depth)?;
315
316        let mut mapping = IndexMap::new();
317
318        while self.parser.check_event() {
319            if let Ok(Some(event)) = self.parser.peek_event() {
320                if matches!(event.event_type, EventType::MappingEnd) {
321                    self.parser.get_event()?;
322                    break;
323                } else if matches!(
324                    event.event_type,
325                    EventType::DocumentEnd { .. }
326                        | EventType::DocumentStart { .. }
327                        | EventType::StreamEnd
328                ) {
329                    break;
330                }
331            }
332
333            let Some(key) = self.compose_node()? else {
334                break;
335            };
336
337            let value = self.compose_node()?.unwrap_or(BorrowedValue::Null);
338
339            self.resource_tracker.add_collection_item(&self.limits)?;
340            self.resource_tracker.add_complexity(&self.limits, 2)?;
341
342            mapping.insert(key, value);
343        }
344
345        self.current_depth -= 1;
346        Ok(Some(BorrowedValue::Mapping(mapping)))
347    }
348}
349
350impl<'a> BorrowedComposer<'a> for ZeroCopyComposer<'a> {
351    fn check_document(&self) -> bool {
352        if let Ok(Some(event)) = self.parser.peek_event() {
353            !matches!(event.event_type, EventType::StreamEnd)
354        } else {
355            false
356        }
357    }
358
359    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>> {
360        if let Some(error) = self.parser.take_scanning_error() {
361            return Err(error);
362        }
363
364        // Consume document start events, capturing the YAML version directive.
365        while let Ok(Some(event)) = self.parser.peek_event() {
366            if let EventType::DocumentStart { version, .. } = &event.event_type {
367                self.yaml_version = version
368                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
369                    .unwrap_or_default();
370                self.parser.get_event()?;
371            } else {
372                break;
373            }
374        }
375
376        let document = self.compose_node()?;
377
378        // Skip any document end event
379        while let Ok(Some(event)) = self.parser.peek_event() {
380            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
381                self.parser.get_event()?;
382            } else {
383                break;
384            }
385        }
386
387        Ok(document)
388    }
389
390    fn position(&self) -> Position {
391        self.position
392    }
393
394    fn reset(&mut self) {
395        self.position = Position::new();
396        self.anchors.clear();
397        self.resource_tracker.reset();
398        self.alias_expansion_stack.clear();
399        self.current_depth = 0;
400    }
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_zero_copy_scalar() {
409        let input = "hello world";
410        let mut composer = ZeroCopyComposer::new(input);
411        let result = composer.compose_document().unwrap().unwrap();
412
413        // Verify we got a string (currently owned due to implementation limitations)
414        if let BorrowedValue::String(cow) = result {
415            // Note: Currently returns owned strings due to implementation limitations
416            // TODO: Implement true zero-copy borrowing with arena allocator
417            assert!(matches!(cow, std::borrow::Cow::Owned(_)));
418            assert_eq!(cow.as_ref(), "hello world");
419        } else {
420            panic!("Expected string value");
421        }
422    }
423
424    #[test]
425    fn test_zero_copy_sequence() {
426        let input = "[1, 2, 3]";
427        let mut composer = ZeroCopyComposer::new(input);
428        let result = composer.compose_document().unwrap().unwrap();
429
430        if let BorrowedValue::Sequence(seq) = result {
431            assert_eq!(seq.len(), 3);
432            assert_eq!(seq[0], BorrowedValue::Int(1));
433            assert_eq!(seq[1], BorrowedValue::Int(2));
434            assert_eq!(seq[2], BorrowedValue::Int(3));
435        } else {
436            panic!("Expected sequence");
437        }
438    }
439
440    #[test]
441    fn test_zero_copy_mapping() {
442        let input = r#"{"key": "value"}"#;
443        let mut composer = ZeroCopyComposer::new(input);
444        let result = composer.compose_document().unwrap().unwrap();
445
446        if let BorrowedValue::Mapping(map) = result {
447            assert_eq!(map.len(), 1);
448            let key = BorrowedValue::owned_string("key".to_string());
449            assert!(map.contains_key(&key));
450        } else {
451            panic!("Expected mapping");
452        }
453    }
454
455    // ---- iterative helpers (#16) ----
456
457    #[test]
458    fn borrowed_structure_depth_scalar_is_one() {
459        assert_eq!(calculate_borrowed_structure_depth(&BorrowedValue::Null), 1);
460        assert_eq!(
461            calculate_borrowed_structure_depth(&BorrowedValue::Int(7)),
462            1
463        );
464        assert_eq!(
465            calculate_borrowed_structure_depth(&BorrowedValue::owned_string("x".into())),
466            1
467        );
468    }
469
470    #[test]
471    fn borrowed_structure_depth_empty_collections_is_one() {
472        assert_eq!(
473            calculate_borrowed_structure_depth(&BorrowedValue::Sequence(Vec::new())),
474            1
475        );
476        assert_eq!(
477            calculate_borrowed_structure_depth(&BorrowedValue::Mapping(IndexMap::new())),
478            1
479        );
480    }
481
482    #[test]
483    fn borrowed_structure_depth_nested_sequence() {
484        // [[[1]]] → depth 4 (outer seq + 2 nested seqs + scalar)
485        let inner = BorrowedValue::Sequence(vec![BorrowedValue::Int(1)]);
486        let mid = BorrowedValue::Sequence(vec![inner]);
487        let outer = BorrowedValue::Sequence(vec![mid]);
488        assert_eq!(calculate_borrowed_structure_depth(&outer), 4);
489    }
490
491    #[test]
492    fn borrowed_structure_depth_nested_mapping() {
493        // { k: { k: 1 } } → depth 3
494        let mut inner = IndexMap::new();
495        inner.insert(
496            BorrowedValue::owned_string("k".into()),
497            BorrowedValue::Int(1),
498        );
499        let mut outer = IndexMap::new();
500        outer.insert(
501            BorrowedValue::owned_string("k".into()),
502            BorrowedValue::Mapping(inner),
503        );
504        assert_eq!(
505            calculate_borrowed_structure_depth(&BorrowedValue::Mapping(outer)),
506            3
507        );
508    }
509
510    #[test]
511    fn borrowed_complexity_scalar_is_one() {
512        assert_eq!(calculate_borrowed_complexity(&BorrowedValue::Null), 1);
513        assert_eq!(calculate_borrowed_complexity(&BorrowedValue::Bool(true)), 1);
514        assert_eq!(calculate_borrowed_complexity(&BorrowedValue::Int(42)), 1);
515        assert_eq!(
516            calculate_borrowed_complexity(&BorrowedValue::owned_string("hi".into())),
517            1
518        );
519    }
520
521    #[test]
522    fn borrowed_complexity_sequence_charges_len_plus_one_plus_children() {
523        // Sequence with 3 scalar children: 1 (seq) + 3 (len) + 3*1 (children) = 7
524        let seq = BorrowedValue::Sequence(vec![
525            BorrowedValue::Int(1),
526            BorrowedValue::Int(2),
527            BorrowedValue::Int(3),
528        ]);
529        assert_eq!(calculate_borrowed_complexity(&seq), 7);
530    }
531
532    #[test]
533    fn borrowed_complexity_mapping_charges_two_per_entry_plus_one_plus_children() {
534        // Mapping with 2 entries (k1: v1, k2: v2): 1 + 2*2 + 4*1 = 9
535        let mut map = IndexMap::new();
536        map.insert(
537            BorrowedValue::owned_string("k1".into()),
538            BorrowedValue::Int(1),
539        );
540        map.insert(
541            BorrowedValue::owned_string("k2".into()),
542            BorrowedValue::Int(2),
543        );
544        assert_eq!(
545            calculate_borrowed_complexity(&BorrowedValue::Mapping(map)),
546            9
547        );
548    }
549
550    #[test]
551    fn borrowed_structure_depth_is_stack_safe_for_deep_input() {
552        // 100k-deep sequence would blow the stack if the helper were recursive.
553        let mut v = BorrowedValue::Int(0);
554        for _ in 0..100_000 {
555            v = BorrowedValue::Sequence(vec![v]);
556        }
557        // We don't care about the exact value, only that it doesn't overflow.
558        let depth = calculate_borrowed_structure_depth(&v);
559        assert!(depth >= 100_000);
560        // Iterative drop to avoid the recursive Drop in Vec/IndexMap.
561        drop_borrowed_iteratively(v);
562    }
563
564    #[test]
565    fn borrowed_complexity_is_stack_safe_for_deep_input() {
566        let mut v = BorrowedValue::Int(0);
567        for _ in 0..100_000 {
568            v = BorrowedValue::Sequence(vec![v]);
569        }
570        let _ = calculate_borrowed_complexity(&v);
571        drop_borrowed_iteratively(v);
572    }
573
574    // Drops a deeply nested BorrowedValue without recursing through
575    // Vec/IndexMap's own Drop. Used only by the stack-safety tests above.
576    fn drop_borrowed_iteratively(root: BorrowedValue<'_>) {
577        let mut stack: Vec<BorrowedValue<'_>> = vec![root];
578        while let Some(node) = stack.pop() {
579            match node {
580                BorrowedValue::Sequence(seq) => stack.extend(seq),
581                BorrowedValue::Mapping(map) => {
582                    for (k, v) in map {
583                        stack.push(k);
584                        stack.push(v);
585                    }
586                }
587                _ => {}
588            }
589        }
590    }
591
592    // ---- alias materialization cap (#15) on ZeroCopyComposer ----
593
594    fn permissive_with_alias_cap(cap: usize) -> Limits {
595        Limits {
596            max_total_alias_nodes: cap,
597            ..Limits::permissive()
598        }
599    }
600
601    #[test]
602    fn zero_copy_alias_materialization_cap_fires() {
603        // Anchor expands to a 10-element sequence; 20 aliased siblings.
604        // Each *a materialization charges ~11 nodes, so cap=50 must reject.
605        let input = r#"
606a: &a [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
607b: [*a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a, *a]
608"#;
609        let mut composer = ZeroCopyComposer::with_limits(input, permissive_with_alias_cap(50));
610        let err = composer.compose_document().expect_err("cap should reject");
611        let msg = err.to_string();
612        assert!(
613            msg.contains("alias") || msg.contains("materializ") || msg.contains("limit"),
614            "expected materialization-cap error, got: {msg}"
615        );
616    }
617
618    #[test]
619    fn zero_copy_alias_below_cap_succeeds() {
620        // Same shape but a generous cap → must succeed.
621        let input = r#"
622a: &a [1, 2, 3]
623b: [*a, *a]
624"#;
625        let mut composer = ZeroCopyComposer::with_limits(input, permissive_with_alias_cap(10_000));
626        let result = composer.compose_document().unwrap().unwrap();
627        // sanity: returns a mapping
628        assert!(matches!(result, BorrowedValue::Mapping(_)));
629    }
630}