rust_yaml/
composer_comments.rs

1//! Comment-preserving YAML composer
2
3use crate::{
4    parser::EventType, BasicParser, BasicScanner, CommentedValue, Comments, Error, Limits, Parser,
5    Position, ResourceTracker, Result, Scanner, Style, TokenType, Value,
6};
7use indexmap::IndexMap;
8use std::collections::HashMap;
9
10/// A composer that preserves comments during parsing
11#[derive(Debug)]
12pub struct CommentPreservingComposer {
13    parser: BasicParser,
14    scanner: BasicScanner,
15    limits: Limits,
16    resource_tracker: ResourceTracker,
17    anchors: HashMap<String, CommentedValue>,
18    current_depth: usize,
19    alias_expansion_stack: Vec<String>,
20    /// Map of positions to comments (position -> comment text)
21    comment_map: HashMap<Position, String>,
22    /// Stack of pending comments that might belong to the next value
23    pending_comments: Vec<String>,
24}
25
26impl CommentPreservingComposer {
27    /// Create a new comment-preserving composer
28    pub fn new(input: String) -> Self {
29        Self::with_limits(input, Limits::default())
30    }
31
32    /// Create a new comment-preserving composer with limits
33    pub fn with_limits(input: String, limits: Limits) -> Self {
34        // Use comment-preserving scanner
35        let scanner = BasicScanner::new_with_comments_and_limits(input.clone(), limits.clone());
36        let parser = BasicParser::new_eager_with_limits(input, limits.clone());
37
38        Self {
39            parser,
40            scanner,
41            limits,
42            resource_tracker: ResourceTracker::new(),
43            anchors: HashMap::new(),
44            current_depth: 0,
45            alias_expansion_stack: Vec::new(),
46            comment_map: HashMap::new(),
47            pending_comments: Vec::new(),
48        }
49    }
50
51    /// Extract comments from the scanner and build a position map
52    fn extract_comments(&mut self) -> Result<()> {
53        // Scan all tokens to extract comments
54        while self.scanner.check_token() {
55            if let Some(token) = self.scanner.get_token()? {
56                if let TokenType::Comment(comment_text) = token.token_type {
57                    // Store comment associated with its position
58                    self.comment_map
59                        .insert(token.start_position, comment_text.trim().to_string());
60                }
61            } else {
62                break;
63            }
64        }
65        Ok(())
66    }
67
68    /// Get comments that should be associated with a value at the given position
69    fn get_comments_for_position(&self, position: Position) -> Comments {
70        let mut comments = Comments::new();
71
72        // Enhanced comment correlation algorithm
73        for (comment_pos, comment_text) in &self.comment_map {
74            let line_diff = comment_pos.line as i32 - position.line as i32;
75
76            // Comments on the same line after the value (trailing)
77            if comment_pos.line == position.line && comment_pos.column > position.column {
78                comments.set_trailing(comment_text.clone());
79            }
80            // Comments on lines before the value (leading)
81            else if (-3..0).contains(&line_diff) {
82                // Allow up to 3 lines before as leading comments
83                comments.add_leading(comment_text.clone());
84            }
85            // Comments on the same line before the value (also leading)
86            else if comment_pos.line == position.line && comment_pos.column < position.column {
87                comments.add_leading(comment_text.clone());
88            }
89            // Comments immediately after (next line) could be inner comments
90            else if line_diff == 1 {
91                comments.add_inner(comment_text.clone());
92            }
93        }
94
95        comments
96    }
97
98    /// Compose a single document with comment preservation
99    pub fn compose_document(&mut self) -> Result<Option<CommentedValue>> {
100        // First, extract all comments from the scanner
101        self.extract_comments()?;
102
103        // Reset state
104        self.current_depth = 0;
105        self.anchors.clear();
106        self.alias_expansion_stack.clear();
107        self.resource_tracker.reset();
108
109        // Compose the document
110        self.compose_node()
111    }
112
113    /// Compose a single node (value) with comments
114    fn compose_node(&mut self) -> Result<Option<CommentedValue>> {
115        // Check resource limits
116        self.resource_tracker.add_complexity(&self.limits, 1)?;
117        self.current_depth += 1;
118
119        if self.current_depth > self.limits.max_depth {
120            return Err(Error::limit_exceeded(format!(
121                "Maximum nesting depth {} exceeded",
122                self.limits.max_depth
123            )));
124        }
125
126        // Get the next event from the parser
127        let event = match self.parser.get_event()? {
128            Some(event) => event,
129            None => {
130                self.current_depth -= 1;
131                return Ok(None);
132            }
133        };
134
135        let position = event.position;
136        let result = match event.event_type {
137            EventType::Scalar { value, anchor, .. } => self.compose_scalar(value, anchor, position),
138            EventType::SequenceStart { anchor, .. } => self.compose_sequence(anchor, position),
139            EventType::MappingStart { anchor, .. } => self.compose_mapping(anchor, position),
140            EventType::Alias { anchor } => self.compose_alias(anchor, position),
141            EventType::StreamStart | EventType::StreamEnd => {
142                // Skip structural events and try next
143                self.compose_node()
144            }
145            EventType::DocumentStart { .. } | EventType::DocumentEnd { .. } => {
146                // Skip document markers and try next
147                self.compose_node()
148            }
149            EventType::SequenceEnd | EventType::MappingEnd => {
150                // These should be handled by their respective start handlers
151                // If we encounter them here, it means unbalanced structure
152                Ok(None)
153            }
154        };
155
156        self.current_depth -= 1;
157        result
158    }
159
160    /// Compose a scalar value
161    fn compose_scalar(
162        &mut self,
163        value: String,
164        anchor: Option<String>,
165        position: Position,
166    ) -> Result<Option<CommentedValue>> {
167        // Resolve the scalar type properly
168        let resolved_value = self.resolve_scalar_type(value);
169
170        let commented_value = CommentedValue {
171            value: resolved_value,
172            comments: self.get_comments_for_position(position),
173            style: Style::default(),
174        };
175
176        // Store anchor if present
177        if let Some(anchor_name) = anchor {
178            self.anchors.insert(anchor_name, commented_value.clone());
179        }
180
181        Ok(Some(commented_value))
182    }
183
184    /// Resolve scalar type from string value
185    fn resolve_scalar_type(&self, value: String) -> Value {
186        // Empty string
187        if value.is_empty() {
188            return Value::String(value);
189        }
190
191        // Try integer parsing
192        if let Ok(int_value) = value.parse::<i64>() {
193            return Value::Int(int_value);
194        }
195
196        // Try float parsing
197        if let Ok(float_value) = value.parse::<f64>() {
198            return Value::Float(float_value);
199        }
200
201        // Try boolean parsing
202        match value.to_lowercase().as_str() {
203            "true" | "yes" | "on" => return Value::Bool(true),
204            "false" | "no" | "off" => return Value::Bool(false),
205            "null" | "~" => return Value::Null,
206            _ => {}
207        }
208
209        // Default to string
210        Value::String(value)
211    }
212
213    /// Compose a sequence
214    fn compose_sequence(
215        &mut self,
216        anchor: Option<String>,
217        position: Position,
218    ) -> Result<Option<CommentedValue>> {
219        let mut sequence = Vec::new();
220        let mut inner_comments = Vec::new();
221
222        // Collect sequence items
223        while let Some(item_event) = self.parser.peek_event()? {
224            if matches!(item_event.event_type, EventType::SequenceEnd) {
225                self.parser.get_event()?; // consume SequenceEnd
226                break;
227            }
228
229            if let Some(item) = self.compose_node()? {
230                self.collect_item_comments(&item, &mut inner_comments);
231                sequence.push(item.value);
232            }
233        }
234
235        let mut comments = self.get_comments_for_position(position);
236        comments.inner = inner_comments;
237
238        let commented_value = CommentedValue {
239            value: Value::Sequence(sequence),
240            comments,
241            style: Style::default(),
242        };
243
244        // Store anchor if present
245        if let Some(anchor_name) = anchor {
246            self.anchors.insert(anchor_name, commented_value.clone());
247        }
248
249        Ok(Some(commented_value))
250    }
251
252    /// Compose a mapping
253    fn compose_mapping(
254        &mut self,
255        anchor: Option<String>,
256        position: Position,
257    ) -> Result<Option<CommentedValue>> {
258        let mut mapping = IndexMap::new();
259        let mut inner_comments = Vec::new();
260
261        // Collect mapping items
262        while let Some(event) = self.parser.peek_event()? {
263            if matches!(event.event_type, EventType::MappingEnd) {
264                self.parser.get_event()?; // consume MappingEnd
265                break;
266            }
267
268            // Get key
269            let (key, key_comments) = match self.compose_node()? {
270                Some(key_commented) => (key_commented.value, key_commented.comments),
271                None => break,
272            };
273
274            // Get value
275            let (value, value_comments) = match self.compose_node()? {
276                Some(value_commented) => (value_commented.value, value_commented.comments),
277                None => (Value::Null, Comments::new()),
278            };
279
280            // Collect comments from key-value pairs
281            self.collect_comments(&key_comments, &mut inner_comments);
282            self.collect_comments(&value_comments, &mut inner_comments);
283
284            // Handle merge keys
285            if let Value::String(key_str) = &key {
286                if key_str == "<<" {
287                    self.process_merge_key(&mut mapping, &value)?;
288                    continue;
289                }
290            }
291
292            mapping.insert(key, value);
293        }
294
295        let mut comments = self.get_comments_for_position(position);
296        comments.inner.extend(inner_comments);
297
298        let commented_value = CommentedValue {
299            value: Value::Mapping(mapping),
300            comments,
301            style: Style::default(),
302        };
303
304        // Store anchor if present
305        if let Some(anchor_name) = anchor {
306            self.anchors.insert(anchor_name, commented_value.clone());
307        }
308
309        Ok(Some(commented_value))
310    }
311
312    /// Compose an alias reference
313    fn compose_alias(
314        &mut self,
315        anchor: String,
316        position: Position,
317    ) -> Result<Option<CommentedValue>> {
318        // Prevent cyclic references
319        if self.alias_expansion_stack.contains(&anchor) {
320            return Err(Error::parse(
321                position,
322                format!("Cyclic alias reference detected: '{}'", anchor),
323            ));
324        }
325
326        self.alias_expansion_stack.push(anchor.clone());
327
328        let result = match self.anchors.get(&anchor) {
329            Some(value) => Ok(Some(value.clone())),
330            None => Err(Error::parse(
331                position,
332                format!("Unknown anchor '{}'", anchor),
333            )),
334        };
335
336        self.alias_expansion_stack.pop();
337        result
338    }
339
340    /// Collect comments from a commented value's comments into inner comments
341    fn collect_item_comments(&self, item: &CommentedValue, inner_comments: &mut Vec<String>) {
342        if item.has_comments() {
343            for leading in &item.comments.leading {
344                inner_comments.push(leading.clone());
345            }
346            if let Some(ref trailing) = item.comments.trailing {
347                inner_comments.push(trailing.clone());
348            }
349        }
350    }
351
352    /// Collect comments from a Comments struct into inner comments
353    fn collect_comments(&self, comments: &Comments, inner_comments: &mut Vec<String>) {
354        if !comments.leading.is_empty() || comments.trailing.is_some() {
355            for leading in &comments.leading {
356                inner_comments.push(leading.clone());
357            }
358            if let Some(ref trailing) = comments.trailing {
359                inner_comments.push(trailing.clone());
360            }
361        }
362    }
363
364    /// Process a merge key by merging values into the current mapping
365    fn process_merge_key(
366        &self,
367        mapping: &mut IndexMap<Value, Value>,
368        merge_value: &Value,
369    ) -> Result<()> {
370        match merge_value {
371            Value::Mapping(source_map) => {
372                for (key, value) in source_map {
373                    mapping.entry(key.clone()).or_insert_with(|| value.clone());
374                }
375            }
376            Value::Sequence(sources) => {
377                for source in sources {
378                    if let Value::Mapping(source_map) = source {
379                        for (key, value) in source_map {
380                            mapping.entry(key.clone()).or_insert_with(|| value.clone());
381                        }
382                    }
383                }
384            }
385            _ => {
386                // Invalid merge value - ignore
387            }
388        }
389        Ok(())
390    }
391}
392
393#[cfg(test)]
394mod tests {
395    use super::*;
396
397    #[test]
398    fn test_comment_preservation() {
399        let yaml = r#"
400# Leading comment
401key: value  # Trailing comment
402# Another comment
403nested:
404  # Nested comment
405  item: data
406"#;
407
408        let mut composer = CommentPreservingComposer::new(yaml.to_string());
409        let result = composer.compose_document().unwrap();
410
411        assert!(result.is_some());
412        let commented_value = result.unwrap();
413
414        // Should have preserved some comments
415        println!("Preserved comments: {:?}", commented_value.comments);
416    }
417}