Skip to main content

yaml_edit/nodes/
scalar_node.rs

1use super::{Lang, SyntaxNode};
2use crate::as_yaml::{AsYaml, YamlKind};
3use crate::lex::SyntaxKind;
4use crate::scalar::ScalarValue;
5use crate::yaml::ValueNode;
6use rowan::ast::AstNode;
7use rowan::GreenNodeBuilder;
8use std::fmt;
9
10ast_node!(Scalar, SCALAR, "A YAML scalar value");
11
12/// Chomping indicator for block scalars
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14enum Chomping {
15    /// Strip final line breaks (indicator: -)
16    Strip,
17    /// Keep final line breaks (indicator: +)
18    Keep,
19    /// Clip to single final line break (default, no indicator)
20    Clip,
21}
22
23/// Error type for scalar type conversions
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub enum ScalarConversionError {
26    /// The scalar value is quoted, indicating it's a string type in YAML
27    QuotedValue,
28    /// The scalar value cannot be parsed as the target type
29    ParseError(String),
30}
31
32impl fmt::Display for ScalarConversionError {
33    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
34        match self {
35            ScalarConversionError::QuotedValue => {
36                write!(f, "Cannot convert quoted scalar to numeric/boolean type")
37            }
38            ScalarConversionError::ParseError(msg) => {
39                write!(f, "Failed to parse scalar: {}", msg)
40            }
41        }
42    }
43}
44
45impl std::error::Error for ScalarConversionError {}
46
47impl Scalar {
48    /// Get the string value of this scalar
49    pub fn value(&self) -> String {
50        self.0.text().to_string()
51    }
52
53    /// Get the string representation of this scalar, properly unquoted and unescaped
54    pub fn as_string(&self) -> String {
55        let text = self.value();
56
57        // Handle quoted strings
58        if text.starts_with('"') && text.ends_with('"') {
59            // Double-quoted string - handle escape sequences
60            ScalarValue::parse_escape_sequences(&text[1..text.len() - 1])
61        } else if text.starts_with('\'') && text.ends_with('\'') {
62            // Single-quoted string - handle '' -> ' escape and fold multi-line strings
63            let content = &text[1..text.len() - 1];
64            let unescaped = content.replace("''", "'");
65            // Only fold lines if actually multi-line
66            if unescaped.contains('\n') {
67                // Fold line breaks (newlines + indentation) to spaces per YAML spec
68                // Using fold() avoids intermediate Vec allocation
69                let mut result = String::new();
70                for (i, line) in unescaped.lines().enumerate() {
71                    if i > 0 {
72                        result.push(' ');
73                    }
74                    result.push_str(line.trim());
75                }
76                result
77            } else {
78                unescaped
79            }
80        } else if text.starts_with('|') || text.starts_with('>') {
81            // Block scalar (literal or folded)
82            Self::parse_block_scalar(&text)
83        } else {
84            // Plain scalar - fold lines if multi-line
85            if text.contains('\n') {
86                // Multi-line plain scalar: fold newlines to spaces
87                // Using manual iteration avoids intermediate Vec allocation
88                let mut result = String::new();
89                let mut first = true;
90                for line in text.lines() {
91                    let trimmed = line.trim();
92                    if !trimmed.is_empty() {
93                        if !first {
94                            result.push(' ');
95                        }
96                        result.push_str(trimmed);
97                        first = false;
98                    }
99                }
100                result
101            } else {
102                text
103            }
104        }
105    }
106
107    /// Parse a block scalar (literal `|` or folded `>`) into its string content
108    fn parse_block_scalar(text: &str) -> String {
109        let mut lines = text.lines();
110        let first_line = match lines.next() {
111            Some(line) => line,
112            None => return String::new(),
113        };
114
115        let is_literal = first_line.starts_with('|');
116
117        // Parse chomping indicator and indentation from header
118        let header = first_line.trim();
119        let chomping = if header.contains('-') {
120            Chomping::Strip
121        } else if header.contains('+') {
122            Chomping::Keep
123        } else {
124            Chomping::Clip
125        };
126
127        // Collect content lines
128        let content_lines: Vec<&str> = lines.collect();
129        if content_lines.is_empty() {
130            return String::new();
131        }
132
133        // Detect base indentation from first non-empty line
134        let base_indent = content_lines
135            .iter()
136            .find(|line| !line.trim().is_empty())
137            .map(|line| line.chars().take_while(|c| *c == ' ').count())
138            .unwrap_or(0);
139
140        // Count trailing empty lines for Keep chomping
141        let trailing_empty_count = content_lines
142            .iter()
143            .rev()
144            .take_while(|line| line.trim().is_empty())
145            .count();
146
147        // Process content
148        let mut result = String::new();
149        let mut prev_was_empty = false;
150        let mut prev_was_more_indented = false;
151
152        for (i, line) in content_lines.iter().enumerate() {
153            if line.trim().is_empty() {
154                // Empty line
155                if is_literal {
156                    // Literal: each line (including empty) gets a newline after it
157                    result.push('\n');
158                } else {
159                    // Folded: empty lines create paragraph breaks (single newline)
160                    if !prev_was_empty && i > 0 {
161                        // Add newline to create paragraph break
162                        result.push('\n');
163                    }
164                }
165                prev_was_empty = true;
166                prev_was_more_indented = false;
167            } else {
168                // Non-empty line - strip up to `base_indent` leading spaces.
169                // base_indent is a character count, so we step by chars to
170                // stay on UTF-8 boundaries even if the line starts with
171                // multi-byte content at less than base_indent spaces of
172                // indentation.
173                let leading_spaces = line.chars().take_while(|c| *c == ' ').count();
174                let strip = leading_spaces.min(base_indent);
175                let strip_bytes = line
176                    .char_indices()
177                    .nth(strip)
178                    .map(|(i, _)| i)
179                    .unwrap_or(line.len());
180                let stripped = &line[strip_bytes..];
181
182                if is_literal {
183                    // Literal: each line gets content + newline
184                    result.push_str(stripped);
185                    result.push('\n');
186                    prev_was_more_indented = false;
187                } else {
188                    // Folded: check if line is more indented than base
189                    let line_indent = line.chars().take_while(|c| *c == ' ').count();
190                    let is_more_indented = line_indent > base_indent;
191
192                    if is_more_indented {
193                        // More-indented lines: preserve on their own line with extra indent
194                        if i > 0 && !prev_was_empty && !prev_was_more_indented {
195                            // Only add newline if transitioning from normal to more-indented
196                            result.push('\n');
197                        }
198                        result.push_str(stripped);
199                        result.push('\n');
200                        prev_was_more_indented = true;
201                    } else {
202                        // Normal line: fold with previous unless after empty line or more-indented
203                        if i > 0 {
204                            if prev_was_empty || prev_was_more_indented {
205                                // After paragraph break or more-indented section, don't add space
206                                result.push_str(stripped);
207                            } else {
208                                // Join with space
209                                result.push(' ');
210                                result.push_str(stripped);
211                            }
212                        } else {
213                            // First line
214                            result.push_str(stripped);
215                        }
216                        prev_was_more_indented = false;
217                    }
218                }
219                prev_was_empty = false;
220            }
221        }
222
223        // Apply chomping
224        match chomping {
225            Chomping::Strip => {
226                // Remove all trailing newlines
227                result = result.trim_end_matches('\n').to_string();
228            }
229            Chomping::Clip => {
230                // Keep single trailing newline
231                result = result.trim_end_matches('\n').to_string();
232                result.push('\n');
233            }
234            Chomping::Keep => {
235                // Keep all trailing newlines - preserve the count we detected
236                // Remove all trailing newlines first, then add back the original count
237                result = result.trim_end_matches('\n').to_string();
238                // Add one newline for the content line, plus trailing empties
239                for _ in 0..=trailing_empty_count {
240                    result.push('\n');
241                }
242            }
243        }
244
245        result
246    }
247
248    /// Check if this scalar is quoted
249    pub fn is_quoted(&self) -> bool {
250        let text = self.value();
251        (text.starts_with('"') && text.ends_with('"'))
252            || (text.starts_with('\'') && text.ends_with('\''))
253    }
254
255    /// Get the raw content of this scalar with outer quotes stripped, but
256    /// without processing any escape sequences.
257    ///
258    /// For most purposes [`as_string`](Self::as_string) is more appropriate as
259    /// it fully unescapes double-quoted strings (`\"`, `\\`, `\n`, etc.) and
260    /// handles the `''` → `'` escape in single-quoted strings. Use this method
261    /// only when you need the verbatim content without escape processing.
262    pub fn unquoted_value(&self) -> String {
263        let text = self.value();
264        if self.is_quoted() {
265            text[1..text.len() - 1].to_string()
266        } else {
267            text
268        }
269    }
270}
271
272impl Scalar {
273    /// Replace the text content of this scalar with `value`.
274    ///
275    /// The token is stored with `SyntaxKind::STRING` regardless of the semantic
276    /// type of `value` (e.g., setting `"42"` does not produce an `INT` token).
277    /// If token-kind accuracy matters, build a replacement scalar node via the
278    /// higher-level API instead.
279    pub fn set_value(&self, value: &str) {
280        let children_count = self.0.children_with_tokens().count();
281        // Create a temporary node to wrap the token and extract a SyntaxToken
282        let mut builder = GreenNodeBuilder::new();
283        builder.start_node(SyntaxKind::ROOT.into());
284        builder.token(SyntaxKind::STRING.into(), value);
285        builder.finish_node();
286        let temp_node = SyntaxNode::new_root_mut(builder.finish());
287        let new_token = temp_node
288            .first_token()
289            .expect("builder always emits a STRING token");
290        self.0
291            .splice_children(0..children_count, vec![new_token.into()]);
292    }
293
294    /// Get the byte offset range of this scalar in the source text.
295    ///
296    /// Returns the start and end byte offsets as a `TextPosition`.
297    pub fn byte_range(&self) -> crate::TextPosition {
298        self.0.text_range().into()
299    }
300
301    /// Get the line and column where this scalar starts.
302    ///
303    /// Requires the original source text to calculate line/column from byte offsets.
304    /// Line and column numbers are 1-indexed.
305    ///
306    /// # Arguments
307    ///
308    /// * `source_text` - The original YAML source text
309    pub fn start_position(&self, source_text: &str) -> crate::LineColumn {
310        let range = self.byte_range();
311        crate::byte_offset_to_line_column(source_text, range.start as usize)
312    }
313
314    /// Get the line and column where this scalar ends.
315    ///
316    /// Requires the original source text to calculate line/column from byte offsets.
317    /// Line and column numbers are 1-indexed.
318    ///
319    /// # Arguments
320    ///
321    /// * `source_text` - The original YAML source text
322    pub fn end_position(&self, source_text: &str) -> crate::LineColumn {
323        let range = self.byte_range();
324        crate::byte_offset_to_line_column(source_text, range.end as usize)
325    }
326
327    /// Try to interpret this scalar as an i64.
328    ///
329    /// Returns `None` if the scalar is quoted (string type) or cannot be parsed as an integer.
330    /// Supports decimal, octal (0o), hexadecimal (0x), and binary (0b) notation.
331    pub fn as_i64(&self) -> Option<i64> {
332        TryInto::<i64>::try_into(self).ok()
333    }
334
335    /// Try to interpret this scalar as an f64.
336    ///
337    /// Returns `None` if the scalar is quoted (string type) or cannot be parsed as a float.
338    pub fn as_f64(&self) -> Option<f64> {
339        TryInto::<f64>::try_into(self).ok()
340    }
341
342    /// Try to interpret this scalar as a bool.
343    ///
344    /// Returns `None` if the scalar is quoted (string type) or is not a recognized boolean value.
345    /// Recognizes: true, false, True, False, TRUE, FALSE, yes, no, Yes, No, YES, NO, on, off, On, Off, ON, OFF
346    pub fn as_bool(&self) -> Option<bool> {
347        TryInto::<bool>::try_into(self).ok()
348    }
349
350    /// Check if this scalar represents a null value.
351    ///
352    /// Returns `true` if the unquoted value is null, Null, NULL, ~, or empty.
353    pub fn is_null(&self) -> bool {
354        if self.is_quoted() {
355            return false;
356        }
357        let val = self.as_string();
358        matches!(val.as_str(), "null" | "Null" | "NULL" | "~" | "")
359    }
360}
361
362impl AsYaml for Scalar {
363    fn as_node(&self) -> Option<&SyntaxNode> {
364        Some(&self.0)
365    }
366
367    fn kind(&self) -> YamlKind {
368        YamlKind::Scalar
369    }
370
371    fn build_content(
372        &self,
373        builder: &mut rowan::GreenNodeBuilder,
374        _indent: usize,
375        _flow_context: bool,
376    ) -> bool {
377        crate::as_yaml::copy_node_content(builder, &self.0);
378        // Scalars don't end with newlines
379        false
380    }
381
382    fn is_inline(&self) -> bool {
383        ValueNode::is_inline(self)
384    }
385}
386
387// TryFrom implementations for typed access
388impl TryFrom<&Scalar> for i64 {
389    type Error = ScalarConversionError;
390
391    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
392        if scalar.is_quoted() {
393            return Err(ScalarConversionError::QuotedValue);
394        }
395
396        let value = scalar.as_string();
397
398        // Handle different number formats
399        if let Some(hex) = value
400            .strip_prefix("0x")
401            .or_else(|| value.strip_prefix("0X"))
402        {
403            i64::from_str_radix(hex, 16)
404                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
405        } else if let Some(octal) = value
406            .strip_prefix("0o")
407            .or_else(|| value.strip_prefix("0O"))
408        {
409            i64::from_str_radix(octal, 8)
410                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
411        } else if let Some(binary) = value
412            .strip_prefix("0b")
413            .or_else(|| value.strip_prefix("0B"))
414        {
415            i64::from_str_radix(binary, 2)
416                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
417        } else {
418            value
419                .parse::<i64>()
420                .map_err(|e| ScalarConversionError::ParseError(e.to_string()))
421        }
422    }
423}
424
425impl TryFrom<&Scalar> for f64 {
426    type Error = ScalarConversionError;
427
428    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
429        if scalar.is_quoted() {
430            return Err(ScalarConversionError::QuotedValue);
431        }
432
433        let value = scalar.as_string();
434
435        // Handle special float values
436        match value.as_str() {
437            ".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Ok(f64::INFINITY),
438            "-.inf" | "-.Inf" | "-.INF" => Ok(f64::NEG_INFINITY),
439            ".nan" | ".NaN" | ".NAN" => Ok(f64::NAN),
440            _ => value
441                .parse::<f64>()
442                .map_err(|e| ScalarConversionError::ParseError(e.to_string())),
443        }
444    }
445}
446
447impl TryFrom<&Scalar> for bool {
448    type Error = ScalarConversionError;
449
450    fn try_from(scalar: &Scalar) -> Result<Self, Self::Error> {
451        if scalar.is_quoted() {
452            return Err(ScalarConversionError::QuotedValue);
453        }
454
455        let value = scalar.as_string();
456
457        // YAML 1.2 Core Schema boolean values
458        match value.as_str() {
459            "true" | "True" | "TRUE" => Ok(true),
460            "false" | "False" | "FALSE" => Ok(false),
461            // YAML 1.1 compatibility (commonly used)
462            "yes" | "Yes" | "YES" | "on" | "On" | "ON" => Ok(true),
463            "no" | "No" | "NO" | "off" | "Off" | "OFF" => Ok(false),
464            _ => Err(ScalarConversionError::ParseError(format!(
465                "'{}' is not a recognized boolean value",
466                value
467            ))),
468        }
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use crate::Document;
475    use std::str::FromStr;
476
477    #[test]
478    fn test_json_array_quoted_strings_cst_structure() {
479        // This test verifies that quoted strings in flow sequences (JSON arrays)
480        // don't incorrectly consume trailing whitespace into the SCALAR node.
481        //
482        // The bug was that the parser would include NEWLINE and INDENT tokens
483        // as children of the SCALAR node instead of as siblings.
484
485        let json = r#"{
486  "items": [
487    "first",
488    "second"
489  ]
490}"#;
491
492        let doc = Document::from_str(json).unwrap();
493        let mapping = doc.as_mapping().unwrap();
494        let items = mapping.get("items").unwrap();
495        let sequence = items.as_sequence().unwrap();
496
497        // Get the scalars
498        let values: Vec<_> = sequence
499            .values()
500            .filter_map(|node| {
501                if let crate::YamlNode::Scalar(scalar) = node {
502                    Some(scalar)
503                } else {
504                    None
505                }
506            })
507            .collect();
508
509        assert_eq!(values.len(), 2);
510
511        // Both values should be clean quoted strings without trailing whitespace
512        assert_eq!(
513            values[0].value(),
514            r#""first""#,
515            "first item should not have trailing whitespace"
516        );
517        assert_eq!(
518            values[1].value(),
519            r#""second""#,
520            "second item should not have trailing whitespace"
521        );
522
523        // as_string() should correctly unquote
524        assert_eq!(values[0].as_string(), "first");
525        assert_eq!(values[1].as_string(), "second");
526    }
527
528    #[test]
529    fn test_compact_json_array() {
530        // Compact JSON should also work correctly
531        let json = r#"{"items": ["first", "second"]}"#;
532
533        let doc = Document::from_str(json).unwrap();
534        let mapping = doc.as_mapping().unwrap();
535        let items = mapping.get("items").unwrap();
536        let sequence = items.as_sequence().unwrap();
537
538        let values: Vec<_> = sequence
539            .values()
540            .filter_map(|node| {
541                if let crate::YamlNode::Scalar(scalar) = node {
542                    Some(scalar)
543                } else {
544                    None
545                }
546            })
547            .collect();
548
549        assert_eq!(values.len(), 2);
550        assert_eq!(values[0].value(), r#""first""#);
551        assert_eq!(values[1].value(), r#""second""#);
552        assert_eq!(values[0].as_string(), "first");
553        assert_eq!(values[1].as_string(), "second");
554    }
555
556    #[test]
557    fn test_yaml_flow_arrays_quoted_strings() {
558        // YAML flow-style arrays should behave the same
559        let yaml = r#"
560items: ["first", "second"]
561"#;
562
563        let doc = Document::from_str(yaml).unwrap();
564        let mapping = doc.as_mapping().unwrap();
565        let items = mapping.get("items").unwrap();
566        let sequence = items.as_sequence().unwrap();
567
568        let values: Vec<_> = sequence
569            .values()
570            .filter_map(|node| {
571                if let crate::YamlNode::Scalar(scalar) = node {
572                    Some(scalar)
573                } else {
574                    None
575                }
576            })
577            .collect();
578
579        assert_eq!(values.len(), 2);
580        assert_eq!(values[0].value(), r#""first""#);
581        assert_eq!(values[1].value(), r#""second""#);
582        assert_eq!(values[0].as_string(), "first");
583        assert_eq!(values[1].as_string(), "second");
584    }
585
586    #[test]
587    fn test_parse_block_scalar_multibyte_after_dedent() {
588        // Regression: `base_indent` is a char count but we sliced bytes,
589        // which panicked when a continuation line started with a multi-byte
590        // character at less than `base_indent` spaces of indentation.
591        let yaml = ">\n  a\n\u{4f1}b\n";
592        // We only care that it does not panic.
593        let _ = super::Scalar::parse_block_scalar(yaml);
594    }
595}