Skip to main content

rust_yaml/
composer_borrowed.rs

1//! Zero-copy YAML composer for converting events to borrowed nodes
2//!
3//! This module provides a composer that minimizes allocations by using
4//! borrowed data structures where possible.
5
6use crate::{
7    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
8    parser::{EventType, ScalarStyle},
9    value_borrowed::BorrowedValue,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13
14/// Calculate the maximum nesting depth of a borrowed value structure
15fn calculate_borrowed_structure_depth(value: &BorrowedValue) -> usize {
16    match value {
17        BorrowedValue::Sequence(seq) => {
18            if seq.is_empty() {
19                1
20            } else {
21                1 + seq
22                    .iter()
23                    .map(calculate_borrowed_structure_depth)
24                    .max()
25                    .unwrap_or(0)
26            }
27        }
28        BorrowedValue::Mapping(map) => {
29            if map.is_empty() {
30                1
31            } else {
32                1 + map
33                    .values()
34                    .map(calculate_borrowed_structure_depth)
35                    .max()
36                    .unwrap_or(0)
37            }
38        }
39        _ => 1, // Scalars have depth 1
40    }
41}
42
43/// Trait for zero-copy YAML composers
44pub trait BorrowedComposer<'a> {
45    /// Check if there are more documents available
46    fn check_document(&self) -> bool;
47
48    /// Compose the next document with minimal allocations
49    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>>;
50
51    /// Get the current position in the stream
52    fn position(&self) -> Position;
53
54    /// Reset the composer state
55    fn reset(&mut self);
56}
57
58/// A zero-copy composer implementation
59pub struct ZeroCopyComposer<'a> {
60    parser: BasicParser,
61    position: Position,
62    /// Store anchors as borrowed values when possible
63    anchors: HashMap<&'a str, BorrowedValue<'a>>,
64    limits: Limits,
65    resource_tracker: ResourceTracker,
66    alias_expansion_stack: Vec<&'a str>,
67    current_depth: usize,
68    /// Reference to the input string for borrowing
69    input: &'a str,
70    /// Active YAML spec version for the current document.
71    yaml_version: crate::version::YamlVersion,
72}
73
74impl<'a> ZeroCopyComposer<'a> {
75    /// Create a new zero-copy composer
76    pub fn new(input: &'a str) -> Self {
77        Self::with_limits(input, Limits::default())
78    }
79
80    /// Create a new zero-copy composer with custom limits
81    pub fn with_limits(input: &'a str, limits: Limits) -> Self {
82        Self {
83            parser: BasicParser::with_limits(input.to_string(), limits.clone()),
84            position: Position::new(),
85            anchors: HashMap::new(),
86            limits,
87            resource_tracker: ResourceTracker::new(),
88            alias_expansion_stack: Vec::new(),
89            current_depth: 0,
90            input,
91            yaml_version: crate::version::YamlVersion::default(),
92        }
93    }
94
95    /// Compose a node from events with minimal allocations
96    fn compose_node(&mut self) -> Result<Option<BorrowedValue<'a>>> {
97        if !self.parser.check_event() {
98            return Ok(None);
99        }
100
101        let Some(event) = self.parser.get_event()? else {
102            return Ok(None);
103        };
104
105        self.position = event.position;
106
107        match event.event_type {
108            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
109
110            EventType::DocumentStart { .. } => self.compose_node(),
111
112            EventType::DocumentEnd { .. } => Ok(None),
113
114            EventType::Scalar {
115                value,
116                anchor,
117                style,
118                ..
119            } => {
120                let scalar_value = self.compose_scalar_borrowed(&value, style)?;
121
122                // Store anchor if present - we need to clone here unfortunately
123                if let Some(anchor_name) = anchor {
124                    // We need to leak the string to get a 'static reference
125                    // In a real implementation, we'd use an arena allocator
126                    let anchor_str = Box::leak(anchor_name.into_boxed_str());
127                    self.anchors
128                        .insert(anchor_str, scalar_value.clone_if_needed());
129                }
130
131                Ok(Some(scalar_value))
132            }
133
134            EventType::SequenceStart { anchor, .. } => {
135                let sequence = self.compose_sequence()?;
136
137                // Store anchor if present
138                if let Some(anchor_name) = anchor {
139                    if let Some(ref seq) = sequence {
140                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
141                        self.anchors.insert(anchor_str, seq.clone_if_needed());
142                    }
143                }
144
145                Ok(sequence)
146            }
147
148            EventType::MappingStart { anchor, .. } => {
149                let mapping = self.compose_mapping()?;
150
151                // Store anchor if present
152                if let Some(anchor_name) = anchor {
153                    if let Some(ref map) = mapping {
154                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
155                        self.anchors.insert(anchor_str, map.clone_if_needed());
156                    }
157                }
158
159                Ok(mapping)
160            }
161
162            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
163
164            EventType::Alias { anchor } => {
165                // Check for cyclic references
166                let anchor_str = anchor.as_str();
167                if self.alias_expansion_stack.iter().any(|&a| a == anchor_str) {
168                    return Err(Error::construction(
169                        event.position,
170                        format!("Cyclic alias reference detected: '{}'", anchor_str),
171                    ));
172                }
173
174                // Check alias expansion depth limit BEFORE pushing
175                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
176                    return Err(Error::construction(
177                        event.position,
178                        format!(
179                            "Maximum alias expansion depth {} exceeded",
180                            self.limits.max_alias_depth
181                        ),
182                    ));
183                }
184
185                // Track alias expansion
186                self.resource_tracker.enter_alias(&self.limits)?;
187
188                // Look up the anchor - try to avoid cloning if possible
189                let result = match self.anchors.get(anchor_str) {
190                    Some(value) => {
191                        // Check if the resolved value's structure depth would exceed alias depth limit
192                        let structure_depth = calculate_borrowed_structure_depth(value);
193                        if structure_depth > self.limits.max_alias_depth {
194                            return Err(Error::construction(
195                                event.position,
196                                format!(
197                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
198                                    anchor_str, structure_depth, self.limits.max_alias_depth
199                                ),
200                            ));
201                        }
202
203                        // Only clone if we absolutely need to
204                        Ok(Some(value.clone_if_needed()))
205                    }
206                    None => Err(Error::construction(
207                        event.position,
208                        format!("Unknown anchor '{}'", anchor_str),
209                    )),
210                };
211
212                self.resource_tracker.exit_alias();
213                result
214            }
215        }
216    }
217
218    /// Compose a scalar value with borrowing when possible
219    fn compose_scalar_borrowed(
220        &self,
221        value: &str,
222        style: ScalarStyle,
223    ) -> Result<BorrowedValue<'a>> {
224        // Explicitly-quoted scalars are always strings.
225        if matches!(style, ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted) {
226            return Ok(BorrowedValue::owned_string(value.to_string()));
227        }
228
229        Ok(
230            match crate::resolver::resolve_plain_scalar(value, self.yaml_version) {
231                crate::resolver::PlainScalarType::Null => BorrowedValue::Null,
232                crate::resolver::PlainScalarType::Bool(b) => BorrowedValue::Bool(b),
233                crate::resolver::PlainScalarType::Int(i) => BorrowedValue::Int(i),
234                crate::resolver::PlainScalarType::Float(f) => BorrowedValue::Float(f),
235                crate::resolver::PlainScalarType::Str => {
236                    BorrowedValue::owned_string(value.to_string())
237                }
238                crate::resolver::PlainScalarType::Value => {
239                    return Err(crate::resolver::value_tag_error(self.position));
240                }
241            },
242        )
243    }
244
245    /// Compose a sequence with minimal allocations
246    fn compose_sequence(&mut self) -> Result<Option<BorrowedValue<'a>>> {
247        self.current_depth += 1;
248        self.resource_tracker
249            .check_depth(&self.limits, self.current_depth)?;
250
251        let mut sequence = Vec::new();
252
253        while self.parser.check_event() {
254            if let Ok(Some(event)) = self.parser.peek_event() {
255                if matches!(event.event_type, EventType::SequenceEnd) {
256                    self.parser.get_event()?;
257                    break;
258                } else if matches!(
259                    event.event_type,
260                    EventType::DocumentEnd { .. }
261                        | EventType::DocumentStart { .. }
262                        | EventType::StreamEnd
263                ) {
264                    break;
265                }
266            }
267
268            if let Some(node) = self.compose_node()? {
269                self.resource_tracker.add_collection_item(&self.limits)?;
270                self.resource_tracker.add_complexity(&self.limits, 1)?;
271                sequence.push(node);
272            } else {
273                break;
274            }
275        }
276
277        self.current_depth -= 1;
278        Ok(Some(BorrowedValue::Sequence(sequence)))
279    }
280
281    /// Compose a mapping with minimal allocations
282    fn compose_mapping(&mut self) -> Result<Option<BorrowedValue<'a>>> {
283        self.current_depth += 1;
284        self.resource_tracker
285            .check_depth(&self.limits, self.current_depth)?;
286
287        let mut mapping = IndexMap::new();
288
289        while self.parser.check_event() {
290            if let Ok(Some(event)) = self.parser.peek_event() {
291                if matches!(event.event_type, EventType::MappingEnd) {
292                    self.parser.get_event()?;
293                    break;
294                } else if matches!(
295                    event.event_type,
296                    EventType::DocumentEnd { .. }
297                        | EventType::DocumentStart { .. }
298                        | EventType::StreamEnd
299                ) {
300                    break;
301                }
302            }
303
304            let Some(key) = self.compose_node()? else {
305                break;
306            };
307
308            let value = self.compose_node()?.unwrap_or(BorrowedValue::Null);
309
310            self.resource_tracker.add_collection_item(&self.limits)?;
311            self.resource_tracker.add_complexity(&self.limits, 2)?;
312
313            mapping.insert(key, value);
314        }
315
316        self.current_depth -= 1;
317        Ok(Some(BorrowedValue::Mapping(mapping)))
318    }
319}
320
321impl<'a> BorrowedComposer<'a> for ZeroCopyComposer<'a> {
322    fn check_document(&self) -> bool {
323        if let Ok(Some(event)) = self.parser.peek_event() {
324            !matches!(event.event_type, EventType::StreamEnd)
325        } else {
326            false
327        }
328    }
329
330    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>> {
331        if let Some(error) = self.parser.take_scanning_error() {
332            return Err(error);
333        }
334
335        // Consume document start events, capturing the YAML version directive.
336        while let Ok(Some(event)) = self.parser.peek_event() {
337            if let EventType::DocumentStart { version, .. } = &event.event_type {
338                self.yaml_version = version
339                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
340                    .unwrap_or_default();
341                self.parser.get_event()?;
342            } else {
343                break;
344            }
345        }
346
347        let document = self.compose_node()?;
348
349        // Skip any document end event
350        while let Ok(Some(event)) = self.parser.peek_event() {
351            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
352                self.parser.get_event()?;
353            } else {
354                break;
355            }
356        }
357
358        Ok(document)
359    }
360
361    fn position(&self) -> Position {
362        self.position
363    }
364
365    fn reset(&mut self) {
366        self.position = Position::new();
367        self.anchors.clear();
368        self.resource_tracker.reset();
369        self.alias_expansion_stack.clear();
370        self.current_depth = 0;
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    #[test]
379    fn test_zero_copy_scalar() {
380        let input = "hello world";
381        let mut composer = ZeroCopyComposer::new(input);
382        let result = composer.compose_document().unwrap().unwrap();
383
384        // Verify we got a string (currently owned due to implementation limitations)
385        if let BorrowedValue::String(cow) = result {
386            // Note: Currently returns owned strings due to implementation limitations
387            // TODO: Implement true zero-copy borrowing with arena allocator
388            assert!(matches!(cow, std::borrow::Cow::Owned(_)));
389            assert_eq!(cow.as_ref(), "hello world");
390        } else {
391            panic!("Expected string value");
392        }
393    }
394
395    #[test]
396    fn test_zero_copy_sequence() {
397        let input = "[1, 2, 3]";
398        let mut composer = ZeroCopyComposer::new(input);
399        let result = composer.compose_document().unwrap().unwrap();
400
401        if let BorrowedValue::Sequence(seq) = result {
402            assert_eq!(seq.len(), 3);
403            assert_eq!(seq[0], BorrowedValue::Int(1));
404            assert_eq!(seq[1], BorrowedValue::Int(2));
405            assert_eq!(seq[2], BorrowedValue::Int(3));
406        } else {
407            panic!("Expected sequence");
408        }
409    }
410
411    #[test]
412    fn test_zero_copy_mapping() {
413        let input = r#"{"key": "value"}"#;
414        let mut composer = ZeroCopyComposer::new(input);
415        let result = composer.compose_document().unwrap().unwrap();
416
417        if let BorrowedValue::Mapping(map) = result {
418            assert_eq!(map.len(), 1);
419            let key = BorrowedValue::owned_string("key".to_string());
420            assert!(map.contains_key(&key));
421        } else {
422            panic!("Expected mapping");
423        }
424    }
425}