rust_yaml/
composer_borrowed.rs

1//! Zero-copy YAML composer for converting events to borrowed nodes
2//!
3//! This module provides a composer that minimizes allocations by using
4//! borrowed data structures where possible.
5
6use crate::{
7    parser::{EventType, ScalarStyle},
8    value_borrowed::BorrowedValue,
9    BasicParser, Error, Limits, Parser, Position, ResourceTracker, Result,
10};
11use indexmap::IndexMap;
12use std::collections::HashMap;
13
14/// Calculate the maximum nesting depth of a borrowed value structure
15fn calculate_borrowed_structure_depth(value: &BorrowedValue) -> usize {
16    match value {
17        BorrowedValue::Sequence(seq) => {
18            if seq.is_empty() {
19                1
20            } else {
21                1 + seq
22                    .iter()
23                    .map(calculate_borrowed_structure_depth)
24                    .max()
25                    .unwrap_or(0)
26            }
27        }
28        BorrowedValue::Mapping(map) => {
29            if map.is_empty() {
30                1
31            } else {
32                1 + map
33                    .values()
34                    .map(calculate_borrowed_structure_depth)
35                    .max()
36                    .unwrap_or(0)
37            }
38        }
39        _ => 1, // Scalars have depth 1
40    }
41}
42
43/// Trait for zero-copy YAML composers
44pub trait BorrowedComposer<'a> {
45    /// Check if there are more documents available
46    fn check_document(&self) -> bool;
47
48    /// Compose the next document with minimal allocations
49    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>>;
50
51    /// Get the current position in the stream
52    fn position(&self) -> Position;
53
54    /// Reset the composer state
55    fn reset(&mut self);
56}
57
58/// A zero-copy composer implementation
59pub struct ZeroCopyComposer<'a> {
60    parser: BasicParser,
61    position: Position,
62    /// Store anchors as borrowed values when possible
63    anchors: HashMap<&'a str, BorrowedValue<'a>>,
64    limits: Limits,
65    resource_tracker: ResourceTracker,
66    alias_expansion_stack: Vec<&'a str>,
67    current_depth: usize,
68    /// Reference to the input string for borrowing
69    input: &'a str,
70}
71
72impl<'a> ZeroCopyComposer<'a> {
73    /// Create a new zero-copy composer
74    pub fn new(input: &'a str) -> Self {
75        Self::with_limits(input, Limits::default())
76    }
77
78    /// Create a new zero-copy composer with custom limits
79    pub fn with_limits(input: &'a str, limits: Limits) -> Self {
80        Self {
81            parser: BasicParser::with_limits(input.to_string(), limits.clone()),
82            position: Position::new(),
83            anchors: HashMap::new(),
84            limits,
85            resource_tracker: ResourceTracker::new(),
86            alias_expansion_stack: Vec::new(),
87            current_depth: 0,
88            input,
89        }
90    }
91
92    /// Compose a node from events with minimal allocations
93    fn compose_node(&mut self) -> Result<Option<BorrowedValue<'a>>> {
94        if !self.parser.check_event() {
95            return Ok(None);
96        }
97
98        let Some(event) = self.parser.get_event()? else {
99            return Ok(None);
100        };
101
102        self.position = event.position;
103
104        match event.event_type {
105            EventType::StreamStart | EventType::StreamEnd => self.compose_node(),
106
107            EventType::DocumentStart { .. } => self.compose_node(),
108
109            EventType::DocumentEnd { .. } => Ok(None),
110
111            EventType::Scalar {
112                value,
113                anchor,
114                style,
115                ..
116            } => {
117                let scalar_value = self.compose_scalar_borrowed(&value, style)?;
118
119                // Store anchor if present - we need to clone here unfortunately
120                if let Some(anchor_name) = anchor {
121                    // We need to leak the string to get a 'static reference
122                    // In a real implementation, we'd use an arena allocator
123                    let anchor_str = Box::leak(anchor_name.into_boxed_str());
124                    self.anchors
125                        .insert(anchor_str, scalar_value.clone_if_needed());
126                }
127
128                Ok(Some(scalar_value))
129            }
130
131            EventType::SequenceStart { anchor, .. } => {
132                let sequence = self.compose_sequence()?;
133
134                // Store anchor if present
135                if let Some(anchor_name) = anchor {
136                    if let Some(ref seq) = sequence {
137                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
138                        self.anchors.insert(anchor_str, seq.clone_if_needed());
139                    }
140                }
141
142                Ok(sequence)
143            }
144
145            EventType::MappingStart { anchor, .. } => {
146                let mapping = self.compose_mapping()?;
147
148                // Store anchor if present
149                if let Some(anchor_name) = anchor {
150                    if let Some(ref map) = mapping {
151                        let anchor_str = Box::leak(anchor_name.into_boxed_str());
152                        self.anchors.insert(anchor_str, map.clone_if_needed());
153                    }
154                }
155
156                Ok(mapping)
157            }
158
159            EventType::SequenceEnd | EventType::MappingEnd => Ok(None),
160
161            EventType::Alias { anchor } => {
162                // Check for cyclic references
163                let anchor_str = anchor.as_str();
164                if self.alias_expansion_stack.iter().any(|&a| a == anchor_str) {
165                    return Err(Error::construction(
166                        event.position,
167                        format!("Cyclic alias reference detected: '{}'", anchor_str),
168                    ));
169                }
170
171                // Check alias expansion depth limit BEFORE pushing
172                if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
173                    return Err(Error::construction(
174                        event.position,
175                        format!(
176                            "Maximum alias expansion depth {} exceeded",
177                            self.limits.max_alias_depth
178                        ),
179                    ));
180                }
181
182                // Track alias expansion
183                self.resource_tracker.enter_alias(&self.limits)?;
184
185                // Look up the anchor - try to avoid cloning if possible
186                let result = match self.anchors.get(anchor_str) {
187                    Some(value) => {
188                        // Check if the resolved value's structure depth would exceed alias depth limit
189                        let structure_depth = calculate_borrowed_structure_depth(value);
190                        if structure_depth > self.limits.max_alias_depth {
191                            return Err(Error::construction(
192                                event.position,
193                                format!(
194                                    "Alias '{}' creates structure with depth {} exceeding max_alias_depth {}",
195                                    anchor_str, structure_depth, self.limits.max_alias_depth
196                                ),
197                            ));
198                        }
199
200                        // Only clone if we absolutely need to
201                        Ok(Some(value.clone_if_needed()))
202                    }
203                    None => Err(Error::construction(
204                        event.position,
205                        format!("Unknown anchor '{}'", anchor_str),
206                    )),
207                };
208
209                self.resource_tracker.exit_alias();
210                result
211            }
212        }
213    }
214
215    /// Compose a scalar value with borrowing when possible
216    fn compose_scalar_borrowed(
217        &self,
218        value: &str,
219        style: ScalarStyle,
220    ) -> Result<BorrowedValue<'a>> {
221        // If explicitly quoted, always treat as string
222        match style {
223            ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => {
224                // For now, use owned strings to avoid unsafe code
225                // In a production implementation, we'd use an arena allocator
226                return Ok(BorrowedValue::owned_string(value.to_string()));
227            }
228            _ => {}
229        }
230
231        // Type resolution for unquoted scalars
232        if value.is_empty() {
233            return Ok(BorrowedValue::owned_string(value.to_string()));
234        }
235
236        // Try integer parsing
237        if let Ok(int_value) = value.parse::<i64>() {
238            return Ok(BorrowedValue::Int(int_value));
239        }
240
241        // Try float parsing
242        if let Ok(float_value) = value.parse::<f64>() {
243            return Ok(BorrowedValue::Float(float_value));
244        }
245
246        // Try boolean parsing
247        match value.to_lowercase().as_str() {
248            "true" | "yes" | "on" => return Ok(BorrowedValue::Bool(true)),
249            "false" | "no" | "off" => return Ok(BorrowedValue::Bool(false)),
250            "null" | "~" => return Ok(BorrowedValue::Null),
251            _ => {}
252        }
253
254        // Default to string - for now use owned to avoid unsafe
255
256        Ok(BorrowedValue::owned_string(value.to_string()))
257    }
258
259    /// Compose a sequence with minimal allocations
260    fn compose_sequence(&mut self) -> Result<Option<BorrowedValue<'a>>> {
261        self.current_depth += 1;
262        self.resource_tracker
263            .check_depth(&self.limits, self.current_depth)?;
264
265        let mut sequence = Vec::new();
266
267        while self.parser.check_event() {
268            if let Ok(Some(event)) = self.parser.peek_event() {
269                if matches!(event.event_type, EventType::SequenceEnd) {
270                    self.parser.get_event()?;
271                    break;
272                } else if matches!(
273                    event.event_type,
274                    EventType::DocumentEnd { .. }
275                        | EventType::DocumentStart { .. }
276                        | EventType::StreamEnd
277                ) {
278                    break;
279                }
280            }
281
282            if let Some(node) = self.compose_node()? {
283                self.resource_tracker.add_collection_item(&self.limits)?;
284                self.resource_tracker.add_complexity(&self.limits, 1)?;
285                sequence.push(node);
286            } else {
287                break;
288            }
289        }
290
291        self.current_depth -= 1;
292        Ok(Some(BorrowedValue::Sequence(sequence)))
293    }
294
295    /// Compose a mapping with minimal allocations
296    fn compose_mapping(&mut self) -> Result<Option<BorrowedValue<'a>>> {
297        self.current_depth += 1;
298        self.resource_tracker
299            .check_depth(&self.limits, self.current_depth)?;
300
301        let mut mapping = IndexMap::new();
302
303        while self.parser.check_event() {
304            if let Ok(Some(event)) = self.parser.peek_event() {
305                if matches!(event.event_type, EventType::MappingEnd) {
306                    self.parser.get_event()?;
307                    break;
308                } else if matches!(
309                    event.event_type,
310                    EventType::DocumentEnd { .. }
311                        | EventType::DocumentStart { .. }
312                        | EventType::StreamEnd
313                ) {
314                    break;
315                }
316            }
317
318            let Some(key) = self.compose_node()? else {
319                break;
320            };
321
322            let value = self.compose_node()?.unwrap_or(BorrowedValue::Null);
323
324            self.resource_tracker.add_collection_item(&self.limits)?;
325            self.resource_tracker.add_complexity(&self.limits, 2)?;
326
327            mapping.insert(key, value);
328        }
329
330        self.current_depth -= 1;
331        Ok(Some(BorrowedValue::Mapping(mapping)))
332    }
333}
334
335impl<'a> BorrowedComposer<'a> for ZeroCopyComposer<'a> {
336    fn check_document(&self) -> bool {
337        if let Ok(Some(event)) = self.parser.peek_event() {
338            !matches!(event.event_type, EventType::StreamEnd)
339        } else {
340            false
341        }
342    }
343
344    fn compose_document(&mut self) -> Result<Option<BorrowedValue<'a>>> {
345        if let Some(error) = self.parser.take_scanning_error() {
346            return Err(error);
347        }
348
349        // Skip any leading document start events
350        while let Ok(Some(event)) = self.parser.peek_event() {
351            if matches!(event.event_type, EventType::DocumentStart { .. }) {
352                self.parser.get_event()?;
353            } else {
354                break;
355            }
356        }
357
358        let document = self.compose_node()?;
359
360        // Skip any document end event
361        while let Ok(Some(event)) = self.parser.peek_event() {
362            if matches!(event.event_type, EventType::DocumentEnd { .. }) {
363                self.parser.get_event()?;
364            } else {
365                break;
366            }
367        }
368
369        Ok(document)
370    }
371
372    fn position(&self) -> Position {
373        self.position
374    }
375
376    fn reset(&mut self) {
377        self.position = Position::new();
378        self.anchors.clear();
379        self.resource_tracker.reset();
380        self.alias_expansion_stack.clear();
381        self.current_depth = 0;
382    }
383}
384
385#[cfg(test)]
386mod tests {
387    use super::*;
388
389    #[test]
390    fn test_zero_copy_scalar() {
391        let input = "hello world";
392        let mut composer = ZeroCopyComposer::new(input);
393        let result = composer.compose_document().unwrap().unwrap();
394
395        // Verify we got a string (currently owned due to implementation limitations)
396        if let BorrowedValue::String(cow) = result {
397            // Note: Currently returns owned strings due to implementation limitations
398            // TODO: Implement true zero-copy borrowing with arena allocator
399            assert!(matches!(cow, std::borrow::Cow::Owned(_)));
400            assert_eq!(cow.as_ref(), "hello world");
401        } else {
402            panic!("Expected string value");
403        }
404    }
405
406    #[test]
407    fn test_zero_copy_sequence() {
408        let input = "[1, 2, 3]";
409        let mut composer = ZeroCopyComposer::new(input);
410        let result = composer.compose_document().unwrap().unwrap();
411
412        if let BorrowedValue::Sequence(seq) = result {
413            assert_eq!(seq.len(), 3);
414            assert_eq!(seq[0], BorrowedValue::Int(1));
415            assert_eq!(seq[1], BorrowedValue::Int(2));
416            assert_eq!(seq[2], BorrowedValue::Int(3));
417        } else {
418            panic!("Expected sequence");
419        }
420    }
421
422    #[test]
423    fn test_zero_copy_mapping() {
424        let input = r#"{"key": "value"}"#;
425        let mut composer = ZeroCopyComposer::new(input);
426        let result = composer.compose_document().unwrap().unwrap();
427
428        if let BorrowedValue::Mapping(map) = result {
429            assert_eq!(map.len(), 1);
430            let key = BorrowedValue::owned_string("key".to_string());
431            assert!(map.contains_key(&key));
432        } else {
433            panic!("Expected mapping");
434        }
435    }
436}