Skip to main content

rust_yaml/
composer_comments.rs

1//! Comment-preserving YAML composer
2
3use crate::{
4    BasicParser, BasicScanner, CommentedValue, Comments, Error, Limits, Parser, Position,
5    ResourceTracker, Result, Scanner, Style, TokenType, Value, parser::EventType,
6};
7use indexmap::IndexMap;
8use std::collections::HashMap;
9
10/// A composer that preserves comments during parsing
11#[derive(Debug)]
12pub struct CommentPreservingComposer {
13    parser: BasicParser,
14    scanner: BasicScanner,
15    limits: Limits,
16    resource_tracker: ResourceTracker,
17    anchors: HashMap<String, CommentedValue>,
18    current_depth: usize,
19    alias_expansion_stack: Vec<String>,
20    /// Map of positions to comments (position -> comment text)
21    comment_map: HashMap<Position, String>,
22    /// Stack of pending comments that might belong to the next value
23    pending_comments: Vec<String>,
24    /// Active YAML spec version for the current document.
25    yaml_version: crate::version::YamlVersion,
26}
27
28impl CommentPreservingComposer {
29    /// Create a new comment-preserving composer
30    pub fn new(input: String) -> Self {
31        Self::with_limits(input, Limits::default())
32    }
33
34    /// Create a new comment-preserving composer with limits
35    pub fn with_limits(input: String, limits: Limits) -> Self {
36        // Use comment-preserving scanner
37        let scanner = BasicScanner::new_with_comments_and_limits(input.clone(), limits.clone());
38        let parser = BasicParser::new_eager_with_limits(input, limits.clone());
39
40        Self {
41            parser,
42            scanner,
43            limits,
44            resource_tracker: ResourceTracker::new(),
45            anchors: HashMap::new(),
46            current_depth: 0,
47            alias_expansion_stack: Vec::new(),
48            comment_map: HashMap::new(),
49            pending_comments: Vec::new(),
50            yaml_version: crate::version::YamlVersion::default(),
51        }
52    }
53
54    /// Extract comments from the scanner and build a position map
55    fn extract_comments(&mut self) -> Result<()> {
56        // Scan all tokens to extract comments
57        while self.scanner.check_token() {
58            if let Some(token) = self.scanner.get_token()? {
59                if let TokenType::Comment(comment_text) = token.token_type {
60                    // Store comment associated with its position
61                    self.comment_map
62                        .insert(token.start_position, comment_text.trim().to_string());
63                }
64            } else {
65                break;
66            }
67        }
68        Ok(())
69    }
70
71    /// Get comments that should be associated with a value at the given position
72    fn get_comments_for_position(&self, position: Position) -> Comments {
73        let mut comments = Comments::new();
74
75        // Enhanced comment correlation algorithm
76        for (comment_pos, comment_text) in &self.comment_map {
77            let line_diff = comment_pos.line as i32 - position.line as i32;
78
79            // Comments on the same line after the value (trailing)
80            if comment_pos.line == position.line && comment_pos.column > position.column {
81                comments.set_trailing(comment_text.clone());
82            }
83            // Comments on lines before the value (leading)
84            else if (-3..0).contains(&line_diff) {
85                // Allow up to 3 lines before as leading comments
86                comments.add_leading(comment_text.clone());
87            }
88            // Comments on the same line before the value (also leading)
89            else if comment_pos.line == position.line && comment_pos.column < position.column {
90                comments.add_leading(comment_text.clone());
91            }
92            // Comments immediately after (next line) could be inner comments
93            else if line_diff == 1 {
94                comments.add_inner(comment_text.clone());
95            }
96        }
97
98        comments
99    }
100
101    /// Compose a single document with comment preservation
102    pub fn compose_document(&mut self) -> Result<Option<CommentedValue>> {
103        // First, extract all comments from the scanner
104        self.extract_comments()?;
105
106        // Reset state
107        self.current_depth = 0;
108        self.anchors.clear();
109        self.alias_expansion_stack.clear();
110        self.resource_tracker.reset();
111
112        // Compose the document
113        self.compose_node()
114    }
115
116    /// Compose a single node (value) with comments
117    fn compose_node(&mut self) -> Result<Option<CommentedValue>> {
118        // Check resource limits
119        self.resource_tracker.add_complexity(&self.limits, 1)?;
120        self.current_depth += 1;
121
122        if self.current_depth > self.limits.max_depth {
123            return Err(Error::limit_exceeded(format!(
124                "Maximum nesting depth {} exceeded",
125                self.limits.max_depth
126            )));
127        }
128
129        // Get the next event from the parser
130        let event = match self.parser.get_event()? {
131            Some(event) => event,
132            None => {
133                self.current_depth -= 1;
134                return Ok(None);
135            }
136        };
137
138        let position = event.position;
139        let result = match event.event_type {
140            EventType::Scalar { value, anchor, .. } => self.compose_scalar(value, anchor, position),
141            EventType::SequenceStart { anchor, .. } => self.compose_sequence(anchor, position),
142            EventType::MappingStart { anchor, .. } => self.compose_mapping(anchor, position),
143            EventType::Alias { anchor } => self.compose_alias(anchor, position),
144            EventType::StreamStart | EventType::StreamEnd => {
145                // Skip structural events and try next
146                self.compose_node()
147            }
148            EventType::DocumentStart { version, .. } => {
149                // Capture the YAML version directive (if any) before recursing.
150                self.yaml_version = version
151                    .map(|(maj, min)| crate::version::YamlVersion::from_directive(maj, min))
152                    .unwrap_or_default();
153                self.compose_node()
154            }
155            EventType::DocumentEnd { .. } => {
156                // Skip document end and try next
157                self.compose_node()
158            }
159            EventType::SequenceEnd | EventType::MappingEnd => {
160                // These should be handled by their respective start handlers
161                // If we encounter them here, it means unbalanced structure
162                Ok(None)
163            }
164        };
165
166        self.current_depth -= 1;
167        result
168    }
169
170    /// Compose a scalar value
171    fn compose_scalar(
172        &mut self,
173        value: String,
174        anchor: Option<String>,
175        position: Position,
176    ) -> Result<Option<CommentedValue>> {
177        // Resolve the scalar type properly
178        let resolved_value = self.resolve_scalar_type(value, position)?;
179
180        let commented_value = CommentedValue {
181            value: resolved_value,
182            comments: self.get_comments_for_position(position),
183            style: Style::default(),
184        };
185
186        // Store anchor if present
187        if let Some(anchor_name) = anchor {
188            self.anchors.insert(anchor_name, commented_value.clone());
189        }
190
191        Ok(Some(commented_value))
192    }
193
194    /// Resolve scalar type from string value (version-aware).
195    ///
196    /// Returns `Err` when the YAML 1.1 `=` value tag is detected — the
197    /// `CommentedValue` tree has no tagged-scalar representation, so we
198    /// surface the same error as the other composers (matches
199    /// `ruamel.yaml typ="safe"`).
200    fn resolve_scalar_type(&self, value: String, position: Position) -> Result<Value> {
201        Ok(
202            match crate::resolver::resolve_plain_scalar(&value, self.yaml_version) {
203                crate::resolver::PlainScalarType::Null => Value::Null,
204                crate::resolver::PlainScalarType::Bool(b) => Value::Bool(b),
205                crate::resolver::PlainScalarType::Int(i) => Value::Int(i),
206                crate::resolver::PlainScalarType::Float(f) => Value::Float(f),
207                crate::resolver::PlainScalarType::Str => Value::String(value),
208                crate::resolver::PlainScalarType::Value => {
209                    return Err(crate::resolver::value_tag_error(position));
210                }
211            },
212        )
213    }
214
215    /// Compose a sequence
216    fn compose_sequence(
217        &mut self,
218        anchor: Option<String>,
219        position: Position,
220    ) -> Result<Option<CommentedValue>> {
221        let mut sequence = Vec::new();
222        let mut inner_comments = Vec::new();
223
224        // Collect sequence items
225        while let Some(item_event) = self.parser.peek_event()? {
226            if matches!(item_event.event_type, EventType::SequenceEnd) {
227                self.parser.get_event()?; // consume SequenceEnd
228                break;
229            }
230
231            if let Some(item) = self.compose_node()? {
232                self.collect_item_comments(&item, &mut inner_comments);
233                sequence.push(item.value);
234            }
235        }
236
237        let mut comments = self.get_comments_for_position(position);
238        comments.inner = inner_comments;
239
240        let commented_value = CommentedValue {
241            value: Value::Sequence(sequence),
242            comments,
243            style: Style::default(),
244        };
245
246        // Store anchor if present
247        if let Some(anchor_name) = anchor {
248            self.anchors.insert(anchor_name, commented_value.clone());
249        }
250
251        Ok(Some(commented_value))
252    }
253
254    /// Compose a mapping
255    fn compose_mapping(
256        &mut self,
257        anchor: Option<String>,
258        position: Position,
259    ) -> Result<Option<CommentedValue>> {
260        let mut mapping = IndexMap::new();
261        let mut inner_comments = Vec::new();
262
263        // Collect mapping items
264        while let Some(event) = self.parser.peek_event()? {
265            if matches!(event.event_type, EventType::MappingEnd) {
266                self.parser.get_event()?; // consume MappingEnd
267                break;
268            }
269
270            // Get key
271            let (key, key_comments) = match self.compose_node()? {
272                Some(key_commented) => (key_commented.value, key_commented.comments),
273                None => break,
274            };
275
276            // Get value
277            let (value, value_comments) = match self.compose_node()? {
278                Some(value_commented) => (value_commented.value, value_commented.comments),
279                None => (Value::Null, Comments::new()),
280            };
281
282            // Collect comments from key-value pairs
283            self.collect_comments(&key_comments, &mut inner_comments);
284            self.collect_comments(&value_comments, &mut inner_comments);
285
286            // Handle merge keys
287            if let Value::String(key_str) = &key {
288                if key_str == "<<" {
289                    self.process_merge_key(&mut mapping, &value)?;
290                    continue;
291                }
292            }
293
294            mapping.insert(key, value);
295        }
296
297        let mut comments = self.get_comments_for_position(position);
298        comments.inner.extend(inner_comments);
299
300        let commented_value = CommentedValue {
301            value: Value::Mapping(mapping),
302            comments,
303            style: Style::default(),
304        };
305
306        // Store anchor if present
307        if let Some(anchor_name) = anchor {
308            self.anchors.insert(anchor_name, commented_value.clone());
309        }
310
311        Ok(Some(commented_value))
312    }
313
314    /// Compose an alias reference
315    fn compose_alias(
316        &mut self,
317        anchor: String,
318        position: Position,
319    ) -> Result<Option<CommentedValue>> {
320        // Prevent cyclic references
321        if self.alias_expansion_stack.contains(&anchor) {
322            return Err(Error::parse(
323                position,
324                format!("Cyclic alias reference detected: '{}'", anchor),
325            ));
326        }
327
328        self.alias_expansion_stack.push(anchor.clone());
329
330        let result = match self.anchors.get(&anchor) {
331            Some(value) => Ok(Some(value.clone())),
332            None => Err(Error::parse(
333                position,
334                format!("Unknown anchor '{}'", anchor),
335            )),
336        };
337
338        self.alias_expansion_stack.pop();
339        result
340    }
341
342    /// Collect comments from a commented value's comments into inner comments
343    fn collect_item_comments(&self, item: &CommentedValue, inner_comments: &mut Vec<String>) {
344        if item.has_comments() {
345            for leading in &item.comments.leading {
346                inner_comments.push(leading.clone());
347            }
348            if let Some(ref trailing) = item.comments.trailing {
349                inner_comments.push(trailing.clone());
350            }
351        }
352    }
353
354    /// Collect comments from a Comments struct into inner comments
355    fn collect_comments(&self, comments: &Comments, inner_comments: &mut Vec<String>) {
356        if !comments.leading.is_empty() || comments.trailing.is_some() {
357            for leading in &comments.leading {
358                inner_comments.push(leading.clone());
359            }
360            if let Some(ref trailing) = comments.trailing {
361                inner_comments.push(trailing.clone());
362            }
363        }
364    }
365
366    /// Process a merge key by merging values into the current mapping
367    fn process_merge_key(
368        &self,
369        mapping: &mut IndexMap<Value, Value>,
370        merge_value: &Value,
371    ) -> Result<()> {
372        match merge_value {
373            Value::Mapping(source_map) => {
374                for (key, value) in source_map {
375                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
376                }
377            }
378            Value::Sequence(sources) => {
379                for source in sources {
380                    if let Value::Mapping(source_map) = source {
381                        for (key, value) in source_map {
382                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
383                        }
384                    }
385                }
386            }
387            _ => {
388                // Invalid merge value - ignore
389            }
390        }
391        Ok(())
392    }
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398
399    #[test]
400    fn test_comment_preservation() {
401        let yaml = r#"
402# Leading comment
403key: value  # Trailing comment
404# Another comment
405nested:
406  # Nested comment
407  item: data
408"#;
409
410        let mut composer = CommentPreservingComposer::new(yaml.to_string());
411        let result = composer.compose_document().unwrap();
412
413        assert!(result.is_some());
414        let commented_value = result.unwrap();
415
416        // Should have preserved some comments
417        println!("Preserved comments: {:?}", commented_value.comments);
418    }
419}