Skip to main content

rust_yaml/
composer_comments.rs

1//! Comment-preserving YAML composer
2
3use crate::{
4    BasicParser, BasicScanner, CommentedValue, Comments, Error, Limits, Parser, Position,
5    ResourceTracker, Result, Scanner, Style, TokenType, Value,
6    composer::calculate_value_complexity, parser::EventType,
7};
8use indexmap::IndexMap;
9use std::collections::HashMap;
10
11/// A composer that preserves comments during parsing
12#[derive(Debug)]
13pub struct CommentPreservingComposer {
14    parser: BasicParser,
15    scanner: BasicScanner,
16    limits: Limits,
17    resource_tracker: ResourceTracker,
18    anchors: HashMap<String, CommentedValue>,
19    current_depth: usize,
20    alias_expansion_stack: Vec<String>,
21    /// Map of positions to comments (position -> comment text)
22    comment_map: HashMap<Position, String>,
23    /// Stack of pending comments that might belong to the next value
24    pending_comments: Vec<String>,
25    /// Active YAML spec version for the current document.
26    yaml_version: crate::version::YamlVersion,
27}
28
29impl CommentPreservingComposer {
30    /// Create a new comment-preserving composer
31    pub fn new(input: String) -> Self {
32        Self::with_limits(input, Limits::default())
33    }
34
35    /// Create a new comment-preserving composer with limits
36    pub fn with_limits(input: String, limits: Limits) -> Self {
37        // Use comment-preserving scanner
38        let scanner = BasicScanner::new_with_comments_and_limits(input.clone(), limits.clone());
39        let parser = BasicParser::new_eager_with_limits(input, limits.clone());
40
41        Self {
42            parser,
43            scanner,
44            limits,
45            resource_tracker: ResourceTracker::new(),
46            anchors: HashMap::new(),
47            current_depth: 0,
48            alias_expansion_stack: Vec::new(),
49            comment_map: HashMap::new(),
50            pending_comments: Vec::new(),
51            yaml_version: crate::version::YamlVersion::default(),
52        }
53    }
54
55    /// Extract comments from the scanner and build a position map
56    fn extract_comments(&mut self) -> Result<()> {
57        // Scan all tokens to extract comments
58        while self.scanner.check_token() {
59            if let Some(token) = self.scanner.get_token()? {
60                if let TokenType::Comment(comment_text) = token.token_type {
61                    // Store comment associated with its position
62                    self.comment_map
63                        .insert(token.start_position, comment_text.trim().to_string());
64                }
65            } else {
66                break;
67            }
68        }
69        Ok(())
70    }
71
72    /// Get comments that should be associated with a value at the given position
73    fn get_comments_for_position(&self, position: Position) -> Comments {
74        let mut comments = Comments::new();
75
76        // Enhanced comment correlation algorithm
77        for (comment_pos, comment_text) in &self.comment_map {
78            let line_diff = comment_pos.line as i32 - position.line as i32;
79
80            // Comments on the same line after the value (trailing)
81            if comment_pos.line == position.line && comment_pos.column > position.column {
82                comments.set_trailing(comment_text.clone());
83            }
84            // Comments on lines before the value (leading)
85            else if (-3..0).contains(&line_diff) {
86                // Allow up to 3 lines before as leading comments
87                comments.add_leading(comment_text.clone());
88            }
89            // Comments on the same line before the value (also leading)
90            else if comment_pos.line == position.line && comment_pos.column < position.column {
91                comments.add_leading(comment_text.clone());
92            }
93            // Comments immediately after (next line) could be inner comments
94            else if line_diff == 1 {
95                comments.add_inner(comment_text.clone());
96            }
97        }
98
99        comments
100    }
101
102    /// Compose a single document with comment preservation
103    pub fn compose_document(&mut self) -> Result<Option<CommentedValue>> {
104        // First, extract all comments from the scanner
105        self.extract_comments()?;
106
107        // Reset state
108        self.current_depth = 0;
109        self.anchors.clear();
110        self.alias_expansion_stack.clear();
111        self.resource_tracker.reset();
112
113        // Compose the document
114        self.compose_node()
115    }
116
117    /// Compose a single node (value) with comments
118    fn compose_node(&mut self) -> Result<Option<CommentedValue>> {
119        // Check resource limits
120        self.resource_tracker.add_complexity(&self.limits, 1)?;
121        self.current_depth += 1;
122
123        if self.current_depth > self.limits.max_depth {
124            return Err(Error::limit_exceeded(format!(
125                "Maximum nesting depth {} exceeded",
126                self.limits.max_depth
127            )));
128        }
129
130        // Get the next event from the parser
131        let event = match self.parser.get_event()? {
132            Some(event) => event,
133            None => {
134                self.current_depth -= 1;
135                return Ok(None);
136            }
137        };
138
139        let position = event.position;
140        let result = match event.event_type {
141            EventType::Scalar { value, anchor, .. } => self.compose_scalar(value, anchor, position),
142            EventType::SequenceStart { anchor, .. } => self.compose_sequence(anchor, position),
143            EventType::MappingStart { anchor, .. } => self.compose_mapping(anchor, position),
144            EventType::Alias { anchor } => self.compose_alias(anchor, position),
145            EventType::StreamStart | EventType::StreamEnd => {
146                // Skip structural events and try next
147                self.compose_node()
148            }
149            EventType::DocumentStart { version, .. } => {
150                // Capture the YAML version directive (if any) before recursing.
151                self.yaml_version = version
152                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
153                    .unwrap_or_default();
154                self.compose_node()
155            }
156            EventType::DocumentEnd { .. } => {
157                // Skip document end and try next
158                self.compose_node()
159            }
160            EventType::SequenceEnd | EventType::MappingEnd => {
161                // These should be handled by their respective start handlers
162                // If we encounter them here, it means unbalanced structure
163                Ok(None)
164            }
165        };
166
167        self.current_depth -= 1;
168        result
169    }
170
171    /// Compose a scalar value
172    fn compose_scalar(
173        &mut self,
174        value: String,
175        anchor: Option<String>,
176        position: Position,
177    ) -> Result<Option<CommentedValue>> {
178        // Resolve the scalar type properly
179        let resolved_value = self.resolve_scalar_type(value, position)?;
180
181        let commented_value = CommentedValue {
182            value: resolved_value,
183            comments: self.get_comments_for_position(position),
184            style: Style::default(),
185        };
186
187        // Store anchor if present
188        if let Some(anchor_name) = anchor {
189            self.anchors.insert(anchor_name, commented_value.clone());
190        }
191
192        Ok(Some(commented_value))
193    }
194
195    /// Resolve scalar type from string value (version-aware).
196    ///
197    /// Returns `Err` when the YAML 1.1 `=` value tag is detected — the
198    /// `CommentedValue` tree has no tagged-scalar representation, so we
199    /// surface the same error as the other composers (matches
200    /// `ruamel.yaml typ="safe"`).
201    fn resolve_scalar_type(&self, value: String, position: Position) -> Result<Value> {
202        Ok(
203            match crate::resolver::resolve_plain_scalar(&value, self.yaml_version) {
204                crate::resolver::PlainScalarType::Null => Value::Null,
205                crate::resolver::PlainScalarType::Bool(b) => Value::Bool(b),
206                crate::resolver::PlainScalarType::Int(i) => Value::Int(i),
207                crate::resolver::PlainScalarType::Float(f) => Value::Float(f),
208                crate::resolver::PlainScalarType::Str => Value::String(value),
209                crate::resolver::PlainScalarType::Value => {
210                    return Err(crate::resolver::value_tag_error(position));
211                }
212            },
213        )
214    }
215
216    /// Compose a sequence
217    fn compose_sequence(
218        &mut self,
219        anchor: Option<String>,
220        position: Position,
221    ) -> Result<Option<CommentedValue>> {
222        let mut sequence = Vec::new();
223        let mut inner_comments = Vec::new();
224
225        // Collect sequence items
226        while let Some(item_event) = self.parser.peek_event()? {
227            if matches!(item_event.event_type, EventType::SequenceEnd) {
228                self.parser.get_event()?; // consume SequenceEnd
229                break;
230            }
231
232            if let Some(item) = self.compose_node()? {
233                self.collect_item_comments(&item, &mut inner_comments);
234                sequence.push(item.value);
235            }
236        }
237
238        let mut comments = self.get_comments_for_position(position);
239        comments.inner = inner_comments;
240
241        let commented_value = CommentedValue {
242            value: Value::Sequence(sequence),
243            comments,
244            style: Style::default(),
245        };
246
247        // Store anchor if present
248        if let Some(anchor_name) = anchor {
249            self.anchors.insert(anchor_name, commented_value.clone());
250        }
251
252        Ok(Some(commented_value))
253    }
254
255    /// Compose a mapping
256    fn compose_mapping(
257        &mut self,
258        anchor: Option<String>,
259        position: Position,
260    ) -> Result<Option<CommentedValue>> {
261        let mut mapping = IndexMap::new();
262        let mut inner_comments = Vec::new();
263
264        // Collect mapping items
265        while let Some(event) = self.parser.peek_event()? {
266            if matches!(event.event_type, EventType::MappingEnd) {
267                self.parser.get_event()?; // consume MappingEnd
268                break;
269            }
270
271            // Get key
272            let (key, key_comments) = match self.compose_node()? {
273                Some(key_commented) => (key_commented.value, key_commented.comments),
274                None => break,
275            };
276
277            // Get value
278            let (value, value_comments) = match self.compose_node()? {
279                Some(value_commented) => (value_commented.value, value_commented.comments),
280                None => (Value::Null, Comments::new()),
281            };
282
283            // Collect comments from key-value pairs
284            self.collect_comments(&key_comments, &mut inner_comments);
285            self.collect_comments(&value_comments, &mut inner_comments);
286
287            // Handle merge keys
288            if let Value::String(key_str) = &key {
289                if key_str == "<<" {
290                    self.process_merge_key(&mut mapping, &value)?;
291                    continue;
292                }
293            }
294
295            mapping.insert(key, value);
296        }
297
298        let mut comments = self.get_comments_for_position(position);
299        comments.inner.extend(inner_comments);
300
301        let commented_value = CommentedValue {
302            value: Value::Mapping(mapping),
303            comments,
304            style: Style::default(),
305        };
306
307        // Store anchor if present
308        if let Some(anchor_name) = anchor {
309            self.anchors.insert(anchor_name, commented_value.clone());
310        }
311
312        Ok(Some(commented_value))
313    }
314
315    /// Compose an alias reference
316    fn compose_alias(
317        &mut self,
318        anchor: String,
319        position: Position,
320    ) -> Result<Option<CommentedValue>> {
321        // Prevent cyclic references
322        if self.alias_expansion_stack.contains(&anchor) {
323            return Err(Error::parse(
324                position,
325                format!("Cyclic alias reference detected: '{}'", anchor),
326            ));
327        }
328
329        // Cap alias expansion depth (parity with BasicComposer).
330        if self.alias_expansion_stack.len() >= self.limits.max_alias_depth {
331            return Err(Error::parse(
332                position,
333                format!(
334                    "Maximum alias expansion depth {} exceeded",
335                    self.limits.max_alias_depth
336                ),
337            ));
338        }
339
340        self.alias_expansion_stack.push(anchor.clone());
341
342        let result = match self.anchors.get(&anchor) {
343            Some(value) => {
344                // Cap cumulative alias materialization BEFORE the clone —
345                // closes the billion-laughs gap on the comment-preserving
346                // load path (#15).
347                let nodes = calculate_value_complexity(&value.value)?;
348                self.resource_tracker
349                    .add_alias_materialization(&self.limits, nodes)?;
350                self.resource_tracker.add_complexity(&self.limits, nodes)?;
351                Ok(Some(value.clone()))
352            }
353            None => Err(Error::parse(
354                position,
355                format!("Unknown anchor '{}'", anchor),
356            )),
357        };
358
359        self.alias_expansion_stack.pop();
360        result
361    }
362
363    /// Collect comments from a commented value's comments into inner comments
364    fn collect_item_comments(&self, item: &CommentedValue, inner_comments: &mut Vec<String>) {
365        if item.has_comments() {
366            for leading in &item.comments.leading {
367                inner_comments.push(leading.clone());
368            }
369            if let Some(ref trailing) = item.comments.trailing {
370                inner_comments.push(trailing.clone());
371            }
372        }
373    }
374
375    /// Collect comments from a Comments struct into inner comments
376    fn collect_comments(&self, comments: &Comments, inner_comments: &mut Vec<String>) {
377        if !comments.leading.is_empty() || comments.trailing.is_some() {
378            for leading in &comments.leading {
379                inner_comments.push(leading.clone());
380            }
381            if let Some(ref trailing) = comments.trailing {
382                inner_comments.push(trailing.clone());
383            }
384        }
385    }
386
387    /// Process a merge key by merging values into the current mapping
388    fn process_merge_key(
389        &self,
390        mapping: &mut IndexMap<Value, Value>,
391        merge_value: &Value,
392    ) -> Result<()> {
393        match merge_value {
394            Value::Mapping(source_map) => {
395                for (key, value) in source_map {
396                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
397                }
398            }
399            Value::Sequence(sources) => {
400                for source in sources {
401                    if let Value::Mapping(source_map) = source {
402                        for (key, value) in source_map {
403                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
404                        }
405                    }
406                }
407            }
408            _ => {
409                // Invalid merge value - ignore
410            }
411        }
412        Ok(())
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use super::*;
419
420    #[test]
421    fn test_comment_preservation() {
422        let yaml = r#"
423# Leading comment
424key: value  # Trailing comment
425# Another comment
426nested:
427  # Nested comment
428  item: data
429"#;
430
431        let mut composer = CommentPreservingComposer::new(yaml.to_string());
432        let result = composer.compose_document().unwrap();
433
434        assert!(result.is_some());
435        let commented_value = result.unwrap();
436
437        // Should have preserved some comments
438        println!("Preserved comments: {:?}", commented_value.comments);
439    }
440}