Skip to main content

yaml_edit/nodes/
scalar_node.rs

1use super::{Lang, SyntaxNode};
2use crate::as_yaml::{AsYaml, YamlKind};
3use crate::lex::SyntaxKind;
4use crate::scalar::ScalarValue;
5use crate::yaml::ValueNode;
6use rowan::ast::AstNode;
7use rowan::GreenNodeBuilder;
8use std::fmt;
9
10ast_node!(Scalar, SCALAR, "A YAML scalar value");
11
12/// Chomping indicator for block scalars
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14enum Chomping {
15    /// Strip final line breaks (indicator: -)
16    Strip,
17    /// Keep final line breaks (indicator: +)
18    Keep,
19    /// Clip to single final line break (default, no indicator)
20    Clip,
21}
22
23/// Error type for scalar type conversions
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum ScalarConversionError {
26    /// The scalar value is quoted, indicating it's a string type in YAML
27    QuotedValue,
28    /// The scalar value cannot be parsed as the target type
29    ParseError(String),
30}
31
32impl fmt::Display for ScalarConversionError {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        match self {
35            ScalarConversionError::QuotedValue => {
36                write!(f, "Cannot convert quoted scalar to numeric/boolean type")
37            }
38            ScalarConversionError::ParseError(msg) => {
39                write!(f, "Failed to parse scalar: {}", msg)
40            }
41        }
42    }
43}
44
45impl std::error::Error for ScalarConversionError {}
46
47impl Scalar {
48    /// Get the string value of this scalar
49    pub fn value(&self) -> String {
50        self.0.text().to_string()
51    }
52
53    /// Get the string representation of this scalar, properly unquoted and unescaped
54    pub fn as_string(&self) -> String {
55        let text = self.value();
56
57        // Handle quoted strings
58        if text.starts_with('"') && text.ends_with('"') {
59            // Double-quoted string - handle escape sequences
60            ScalarValue::parse_escape_sequences(&text[1..text.len() - 1])
61        } else if text.starts_with('\'') && text.ends_with('\'') {
62            // Single-quoted string - handle '' -> ' escape and fold multi-line strings
63            let content = &text[1..text.len() - 1];
64            let unescaped = content.replace("''", "'");
65            // Only fold lines if actually multi-line
66            if unescaped.contains('\n') {
67                // Fold line breaks (newlines + indentation) to spaces per YAML spec
68                // Using fold() avoids intermediate Vec allocation
69                let mut result = String::new();
70                for (i, line) in unescaped.lines().enumerate() {
71                    if i > 0 {
72                        result.push(' ');
73                    }
74                    result.push_str(line.trim());
75                }
76                result
77            } else {
78                unescaped
79            }
80        } else if text.starts_with('|') || text.starts_with('>') {
81            // Block scalar (literal or folded)
82            Self::parse_block_scalar(&text)
83        } else {
84            // Plain scalar - fold lines if multi-line
85            if text.contains('\n') {
86                // Multi-line plain scalar: fold newlines to spaces
87                // Using manual iteration avoids intermediate Vec allocation
88                let mut result = String::new();
89                let mut first = true;
90                for line in text.lines() {
91                    let trimmed = line.trim();
92                    if !trimmed.is_empty() {
93                        if !first {
94                            result.push(' ');
95                        }
96                        result.push_str(trimmed);
97                        first = false;
98                    }
99                }
100                result
101            } else {
102                text
103            }
104        }
105    }
106
107    /// Parse a block scalar (literal `|` or folded `>`) into its string content
108    fn parse_block_scalar(text: &str) -> String {
109        let mut lines = text.lines();
110        let first_line = match lines.next() {
111            Some(line) => line,
112            None => return String::new(),
113        };
114
115        let is_literal = first_line.starts_with('|');
116
117        // Parse chomping indicator and indentation from header
118        let header = first_line.trim();
119        let chomping = if header.contains('-') {
120            Chomping::Strip
121        } else if header.contains('+') {
122            Chomping::Keep
123        } else {
124            Chomping::Clip
125        };
126
127        // Collect content lines
128        let content_lines: Vec<&str> = lines.collect();
129        if content_lines.is_empty() {
130            return String::new();
131        }
132
133        // Detect base indentation from first non-empty line
134        let base_indent = content_lines
135            .iter()
136            .find(|line| !line.trim().is_empty())
137            .map(|line| line.chars().take_while(|c| *c == ' ').count())
138            .unwrap_or(0);
139
140        // Count trailing empty lines for Keep chomping
141        let trailing_empty_count = content_lines
142            .iter()
143            .rev()
144            .take_while(|line| line.trim().is_empty())
145            .count();
146
147        // Process content
148        let mut result = String::new();
149        let mut prev_was_empty = false;
150        let mut prev_was_more_indented = false;
151
152        for (i, line) in content_lines.iter().enumerate() {
153            if line.trim().is_empty() {
154                // Empty line
155                if is_literal {
156                    // Literal: each line (including empty) gets a newline after it
157                    result.push('\n');
158                } else {
159                    // Folded: empty lines create paragraph breaks (single newline)
160                    if !prev_was_empty && i > 0 {
161                        // Add newline to create paragraph break
162                        result.push('\n');
163                    }
164                }
165                prev_was_empty = true;
166                prev_was_more_indented = false;
167            } else {
168                // Non-empty line - strip base indentation
169                let stripped = if line.len() >= base_indent {
170                    &line[base_indent..]
171                } else {
172                    line.trim_start()
173                };
174
175                if is_literal {
176                    // Literal: each line gets content + newline
177                    result.push_str(stripped);
178                    result.push('\n');
179                    prev_was_more_indented = false;
180                } else {
181                    // Folded: check if line is more indented than base
182                    let line_indent = line.chars().take_while(|c| *c == ' ').count();
183                    let is_more_indented = line_indent > base_indent;
184
185                    if is_more_indented {
186                        // More-indented lines: preserve on their own line with extra indent
187                        if i > 0 && !prev_was_empty && !prev_was_more_indented {
188                            // Only add newline if transitioning from normal to more-indented
189                            result.push('\n');
190                        }
191                        result.push_str(stripped);
192                        result.push('\n');
193                        prev_was_more_indented = true;
194                    } else {
195                        // Normal line: fold with previous unless after empty line or more-indented
196                        if i > 0 {
197                            if prev_was_empty || prev_was_more_indented {
198                                // After paragraph break or more-indented section, don't add space
199                                result.push_str(stripped);
200                            } else {
201                                // Join with space
202                                result.push(' ');
203                                result.push_str(stripped);
204                            }
205                        } else {
206                            // First line
207                            result.push_str(stripped);
208                        }
209                        prev_was_more_indented = false;
210                    }
211                }
212                prev_was_empty = false;
213            }
214        }
215
216        // Apply chomping
217        match chomping {
218            Chomping::Strip => {
219                // Remove all trailing newlines
220                result = result.trim_end_matches('\n').to_string();
221            }
222            Chomping::Clip => {
223                // Keep single trailing newline
224                result = result.trim_end_matches('\n').to_string();
225                result.push('\n');
226            }
227            Chomping::Keep => {
228                // Keep all trailing newlines - preserve the count we detected
229                // Remove all trailing newlines first, then add back the original count
230                result = result.trim_end_matches('\n').to_string();
231                // Add one newline for the content line, plus trailing empties
232                for _ in 0..=trailing_empty_count {
233                    result.push('\n');
234                }
235            }
236        }
237
238        result
239    }
240
241    /// Check if this scalar is quoted
242    pub fn is_quoted(&self) -> bool {
243        let text = self.value();
244        (text.starts_with('"') && text.ends_with('"'))
245            || (text.starts_with('\'') && text.ends_with('\''))
246    }
247
248    /// Get the raw content of this scalar with outer quotes stripped, but
249    /// without processing any escape sequences.
250    ///
251    /// For most purposes [`as_string`](Self::as_string) is more appropriate as
252    /// it fully unescapes double-quoted strings (`\"`, `\\`, `\n`, etc.) and
253    /// handles the `''` → `'` escape in single-quoted strings. Use this method
254    /// only when you need the verbatim content without escape processing.
255    pub fn unquoted_value(&self) -> String {
256        let text = self.value();
257        if self.is_quoted() {
258            text[1..text.len() - 1].to_string()
259        } else {
260            text
261        }
262    }
263}
264
265impl Scalar {
266    /// Replace the text content of this scalar with `value`.
267    ///
268    /// The token is stored with `SyntaxKind::STRING` regardless of the semantic
269    /// type of `value` (e.g., setting `"42"` does not produce an `INT` token).
270    /// If token-kind accuracy matters, build a replacement scalar node via the
271    /// higher-level API instead.
272    pub fn set_value(&self, value: &str) {
273        let children_count = self.0.children_with_tokens().count();
274        // Create a temporary node to wrap the token and extract a SyntaxToken
275        let mut builder = GreenNodeBuilder::new();
276        builder.start_node(SyntaxKind::ROOT.into());
277        builder.token(SyntaxKind::STRING.into(), value);
278        builder.finish_node();
279        let temp_node = SyntaxNode::new_root_mut(builder.finish());
280        let new_token = temp_node
281            .first_token()
282            .expect("builder always emits a STRING token");
283        self.0
284            .splice_children(0..children_count, vec![new_token.into()]);
285    }
286
287    /// Get the byte offset range of this scalar in the source text.
288    ///
289    /// Returns the start and end byte offsets as a `TextPosition`.
290    pub fn byte_range(&self) -> crate::TextPosition {
291        self.0.text_range().into()
292    }
293
294    /// Get the line and column where this scalar starts.
295    ///
296    /// Requires the original source text to calculate line/column from byte offsets.
297    /// Line and column numbers are 1-indexed.
298    ///
299    /// # Arguments
300    ///
301    /// * `source_text` - The original YAML source text
302    pub fn start_position(&self, source_text: &str) -> crate::LineColumn {
303        let range = self.byte_range();
304        crate::byte_offset_to_line_column(source_text, range.start as usize)
305    }
306
307    /// Get the line and column where this scalar ends.
308    ///
309    /// Requires the original source text to calculate line/column from byte offsets.
310    /// Line and column numbers are 1-indexed.
311    ///
312    /// # Arguments
313    ///
314    /// * `source_text` - The original YAML source text
315    pub fn end_position(&self, source_text: &str) -> crate::LineColumn {
316        let range = self.byte_range();
317        crate::byte_offset_to_line_column(source_text, range.end as usize)
318    }
319
320    /// Try to interpret this scalar as an i64.
321    ///
322    /// Returns `None` if the scalar is quoted (string type) or cannot be parsed as an integer.
323    /// Supports decimal, octal (0o), hexadecimal (0x), and binary (0b) notation.
324    pub fn as_i64(&self) -> Option<i64> {
325        TryInto::<i64>::try_into(self).ok()
326    }
327
328    /// Try to interpret this scalar as an f64.
329    ///
330    /// Returns `None` if the scalar is quoted (string type) or cannot be parsed as a float.
331    pub fn as_f64(&self) -> Option<f64> {
332        TryInto::<f64>::try_into(self).ok()
333    }
334
335    /// Try to interpret this scalar as a bool.
336    ///
337    /// Returns `None` if the scalar is quoted (string type) or is not a recognized boolean value.
338    /// Recognizes: true, false, True, False, TRUE, FALSE, yes, no, Yes, No, YES, NO, on, off, On, Off, ON, OFF
339    pub fn as_bool(&self) -> Option<bool> {
340        TryInto::<bool>::try_into(self).ok()
341    }
342
343    /// Check if this scalar represents a null value.
344    ///
345    /// Returns `true` if the unquoted value is null, Null, NULL, ~, or empty.
346    pub fn is_null(&self) -> bool {
347        if self.is_quoted() {
348            return false;
349        }
350        let val = self.as_string();
351        matches!(val.as_str(), "null" | "Null" | "NULL" | "~" | "")
352    }
353}
354
355impl AsYaml for Scalar {
356    fn as_node(&self) -> Option<&SyntaxNode> {
357        Some(&self.0)
358    }
359
360    fn kind(&self) -> YamlKind {
361        YamlKind::Scalar
362    }
363
364    fn build_content(
365        &self,
366        builder: &mut rowan::GreenNodeBuilder,
367        _indent: usize,
368        _flow_context: bool,
369    ) -> bool {
370        crate::as_yaml::copy_node_content(builder, &self.0);
371        // Scalars don't end with newlines
372        false
373    }
374
375    fn is_inline(&self) -> bool {
376        ValueNode::is_inline(self)
377    }
378}
379
380// TryFrom implementations for typed access
381impl TryFrom<&Scalar> for i64 {
382    type Error = ScalarConversionError;
383
384    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
385        if scalar.is_quoted() {
386            return Err(ScalarConversionError::QuotedValue);
387        }
388
389        let value = scalar.as_string();
390
391        // Handle different number formats
392        if let Some(hex) = value
393            .strip_prefix("0x")
394            .or_else(|| value.strip_prefix("0X"))
395        {
396            i64::from_str_radix(hex, 16)
397                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
398        } else if let Some(octal) = value
399            .strip_prefix("0o")
400            .or_else(|| value.strip_prefix("0O"))
401        {
402            i64::from_str_radix(octal, 8)
403                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
404        } else if let Some(binary) = value
405            .strip_prefix("0b")
406            .or_else(|| value.strip_prefix("0B"))
407        {
408            i64::from_str_radix(binary, 2)
409                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
410        } else {
411            value
412                .parse::<i64>()
413                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
414        }
415    }
416}
417
418impl TryFrom<&Scalar> for f64 {
419    type Error = ScalarConversionError;
420
421    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
422        if scalar.is_quoted() {
423            return Err(ScalarConversionError::QuotedValue);
424        }
425
426        let value = scalar.as_string();
427
428        // Handle special float values
429        match value.as_str() {
430            ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Ok(f64::INFINITY),
431            "-.inf" | "-.Inf" | "-.INF" => Ok(f64::NEG_INFINITY),
432            ".nan" | ".NaN" | ".NAN" => Ok(f64::NAN),
433            _ => value
434                .parse::<f64>()
435                .map_err(|e| ScalarConversionError::ParseError(e.to_string())),
436        }
437    }
438}
439
440impl TryFrom<&Scalar> for bool {
441    type Error = ScalarConversionError;
442
443    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
444        if scalar.is_quoted() {
445            return Err(ScalarConversionError::QuotedValue);
446        }
447
448        let value = scalar.as_string();
449
450        // YAML 1.2 Core Schema boolean values
451        match value.as_str() {
452            "true" | "True" | "TRUE" => Ok(true),
453            "false" | "False" | "FALSE" => Ok(false),
454            // YAML 1.1 compatibility (commonly used)
455            "yes" | "Yes" | "YES" | "on" | "On" | "ON" => Ok(true),
456            "no" | "No" | "NO" | "off" | "Off" | "OFF" => Ok(false),
457            _ => Err(ScalarConversionError::ParseError(format!(
458                "'{}' is not a recognized boolean value",
459                value
460            ))),
461        }
462    }
463}
464
465#[cfg(test)]
466mod tests {
467    use crate::Document;
468    use std::str::FromStr;
469
470    #[test]
471    fn test_json_array_quoted_strings_cst_structure() {
472        // This test verifies that quoted strings in flow sequences (JSON arrays)
473        // don't incorrectly consume trailing whitespace into the SCALAR node.
474        //
475        // The bug was that the parser would include NEWLINE and INDENT tokens
476        // as children of the SCALAR node instead of as siblings.
477
478        let json = r#"{
479  "items": [
480    "first",
481    "second"
482  ]
483}"#;
484
485        let doc = Document::from_str(json).unwrap();
486        let mapping = doc.as_mapping().unwrap();
487        let items = mapping.get("items").unwrap();
488        let sequence = items.as_sequence().unwrap();
489
490        // Get the scalars
491        let values: Vec<_> = sequence
492            .values()
493            .filter_map(|node| {
494                if let crate::YamlNode::Scalar(scalar) = node {
495                    Some(scalar)
496                } else {
497                    None
498                }
499            })
500            .collect();
501
502        assert_eq!(values.len(), 2);
503
504        // Both values should be clean quoted strings without trailing whitespace
505        assert_eq!(
506            values[0].value(),
507            r#""first""#,
508            "first item should not have trailing whitespace"
509        );
510        assert_eq!(
511            values[1].value(),
512            r#""second""#,
513            "second item should not have trailing whitespace"
514        );
515
516        // as_string() should correctly unquote
517        assert_eq!(values[0].as_string(), "first");
518        assert_eq!(values[1].as_string(), "second");
519    }
520
521    #[test]
522    fn test_compact_json_array() {
523        // Compact JSON should also work correctly
524        let json = r#"{"items": ["first", "second"]}"#;
525
526        let doc = Document::from_str(json).unwrap();
527        let mapping = doc.as_mapping().unwrap();
528        let items = mapping.get("items").unwrap();
529        let sequence = items.as_sequence().unwrap();
530
531        let values: Vec<_> = sequence
532            .values()
533            .filter_map(|node| {
534                if let crate::YamlNode::Scalar(scalar) = node {
535                    Some(scalar)
536                } else {
537                    None
538                }
539            })
540            .collect();
541
542        assert_eq!(values.len(), 2);
543        assert_eq!(values[0].value(), r#""first""#);
544        assert_eq!(values[1].value(), r#""second""#);
545        assert_eq!(values[0].as_string(), "first");
546        assert_eq!(values[1].as_string(), "second");
547    }
548
549    #[test]
550    fn test_yaml_flow_arrays_quoted_strings() {
551        // YAML flow-style arrays should behave the same
552        let yaml = r#"
553items: ["first", "second"]
554"#;
555
556        let doc = Document::from_str(yaml).unwrap();
557        let mapping = doc.as_mapping().unwrap();
558        let items = mapping.get("items").unwrap();
559        let sequence = items.as_sequence().unwrap();
560
561        let values: Vec<_> = sequence
562            .values()
563            .filter_map(|node| {
564                if let crate::YamlNode::Scalar(scalar) = node {
565                    Some(scalar)
566                } else {
567                    None
568                }
569            })
570            .collect();
571
572        assert_eq!(values.len(), 2);
573        assert_eq!(values[0].value(), r#""first""#);
574        assert_eq!(values[1].value(), r#""second""#);
575        assert_eq!(values[0].as_string(), "first");
576        assert_eq!(values[1].as_string(), "second");
577    }
578}