Skip to main content

yaml_edit/
scalar.rs

1//! Scalar value wrapper with proper escaping and style support.
2
3use std::fmt;
4
5#[cfg(feature = "base64")]
6use base64::{engine::general_purpose, Engine as _};
7
8/// Base64 encode bytes for binary data
9#[cfg(feature = "base64")]
10fn base64_encode(input: &[u8]) -> String {
11    general_purpose::STANDARD.encode(input)
12}
13
14/// Base64 decode string back to bytes
15#[cfg(feature = "base64")]
16fn base64_decode(input: &str) -> Result<Vec<u8>, String> {
17    general_purpose::STANDARD
18        .decode(input.trim())
19        .map_err(|e| format!("Base64 decode error: {}", e))
20}
21
22/// Style of scalar representation in YAML
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum ScalarStyle {
25    /// Plain scalar (no quotes)
26    Plain,
27    /// Single-quoted scalar
28    SingleQuoted,
29    /// Double-quoted scalar
30    DoubleQuoted,
31    /// Literal scalar (|)
32    Literal,
33    /// Folded scalar (>)
34    Folded,
35}
36
37/// Type of a scalar value
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum ScalarType {
40    /// String value
41    String,
42    /// Integer value
43    Integer,
44    /// Float value
45    Float,
46    /// Boolean value
47    Boolean,
48    /// Null value
49    Null,
50    /// Binary data (base64 encoded)
51    #[cfg(feature = "base64")]
52    Binary,
53    /// Timestamp value
54    Timestamp,
55    /// Regular expression
56    Regex,
57}
58
59/// A scalar value with metadata about its style and content
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct ScalarValue {
62    /// The actual value
63    value: String,
64    /// The style to use when rendering
65    style: ScalarStyle,
66    /// The type of the scalar
67    scalar_type: ScalarType,
68}
69
70impl ScalarValue {
71    /// Create a scalar value explicitly treating it as a string (no type auto-detection)
72    ///
73    /// This method always creates a `String` type scalar. The value will be properly
74    /// quoted if needed when rendering to YAML, but no type detection is performed.
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use yaml_edit::ScalarValue;
80    ///
81    /// let scalar = ScalarValue::string("123");
82    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
83    /// // Renders with quotes to distinguish from integer
84    /// assert_eq!(scalar.to_yaml_string(), "'123'");
85    ///
86    /// let scalar = ScalarValue::string("true");
87    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
88    /// // Renders with quotes to distinguish from boolean
89    /// assert_eq!(scalar.to_yaml_string(), "'true'");
90    ///
91    /// let scalar = ScalarValue::string("hello");
92    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
93    /// // Plain strings don't need quotes
94    /// assert_eq!(scalar.to_yaml_string(), "hello");
95    /// ```
96    ///
97    /// For YAML-style type detection (parsing "123" as Integer, "true" as Boolean),
98    /// use [`ScalarValue::parse()`] instead.
99    pub fn string(value: impl Into<String>) -> Self {
100        let value = value.into();
101        let style = Self::detect_style(&value);
102        // Detect the type - default to String for user-provided values
103        let scalar_type = ScalarType::String;
104        Self {
105            value,
106            style,
107            scalar_type,
108        }
109    }
110
111    /// Parse escape sequences in a double-quoted string
112    pub fn parse_escape_sequences(text: &str) -> String {
113        let mut result = String::with_capacity(text.len()); // Pre-allocate
114        let mut chars = text.chars().peekable();
115
116        while let Some(ch) = chars.next() {
117            if ch == '\\' {
118                if let Some(&escaped) = chars.peek() {
119                    chars.next(); // consume the escaped character
120                    match escaped {
121                        // Standard escape sequences
122                        'n' => result.push('\n'),
123                        't' => result.push('\t'),
124                        'r' => result.push('\r'),
125                        'b' => result.push('\x08'),
126                        'f' => result.push('\x0C'),
127                        'a' => result.push('\x07'), // bell
128                        'e' => result.push('\x1B'), // escape
129                        'v' => result.push('\x0B'), // vertical tab
130                        '0' => result.push('\0'),   // null
131                        '\\' => result.push('\\'),
132                        '"' => result.push('"'),
133                        '\'' => result.push('\''),
134                        '/' => result.push('/'),
135                        // Line break escape (YAML specific)
136                        ' ' => {
137                            // Escaped space followed by line break - line folding
138                            if let Some(&'\n') = chars.peek() {
139                                chars.next(); // consume the newline
140                                              // In YAML, escaped line breaks are folded to nothing
141                                continue;
142                            } else {
143                                result.push(' ');
144                            }
145                        }
146                        '\n' => {
147                            // Escaped line break - removes the line break
148                            continue;
149                        }
150                        // Unicode escapes
151                        'x' => {
152                            // \xNN - 2-digit hex
153                            let mut hex_chars = [0u8; 2];
154                            let mut count = 0;
155                            for (i, ch) in chars.by_ref().take(2).enumerate() {
156                                if let Some(digit) = ch.to_digit(16) {
157                                    hex_chars[i] = digit as u8;
158                                    count += 1;
159                                } else {
160                                    // Put back invalid char
161                                    result.push('\\');
162                                    result.push('x');
163                                    for &hex_char in hex_chars.iter().take(count) {
164                                        result.push(char::from_digit(hex_char as u32, 16).unwrap());
165                                    }
166                                    result.push(ch);
167                                    break;
168                                }
169                            }
170                            if count == 2 {
171                                let code = hex_chars[0] * 16 + hex_chars[1];
172                                result.push(code as char);
173                            } else if count > 0 {
174                                // Incomplete hex escape
175                                result.push('\\');
176                                result.push('x');
177                                for &hex_char in hex_chars.iter().take(count) {
178                                    result.push(char::from_digit(hex_char as u32, 16).unwrap());
179                                }
180                            }
181                        }
182                        'u' => {
183                            // \uNNNN - 4-digit hex
184                            let hex_digits: String = chars.by_ref().take(4).collect();
185                            if hex_digits.len() == 4 {
186                                if let Ok(code) = u16::from_str_radix(&hex_digits, 16) {
187                                    if let Some(unicode_char) = char::from_u32(code as u32) {
188                                        result.push(unicode_char);
189                                    } else {
190                                        // Invalid Unicode code point
191                                        result.push('\\');
192                                        result.push('u');
193                                        result.push_str(&hex_digits);
194                                    }
195                                } else {
196                                    // Invalid hex
197                                    result.push('\\');
198                                    result.push('u');
199                                    result.push_str(&hex_digits);
200                                }
201                            } else {
202                                // Incomplete hex escape
203                                result.push('\\');
204                                result.push('u');
205                                result.push_str(&hex_digits);
206                            }
207                        }
208                        'U' => {
209                            // \UNNNNNNNN - 8-digit hex
210                            let hex_digits: String = chars.by_ref().take(8).collect();
211                            if hex_digits.len() == 8 {
212                                if let Ok(code) = u32::from_str_radix(&hex_digits, 16) {
213                                    if let Some(unicode_char) = char::from_u32(code) {
214                                        result.push(unicode_char);
215                                    } else {
216                                        // Invalid Unicode code point
217                                        result.push('\\');
218                                        result.push('U');
219                                        result.push_str(&hex_digits);
220                                    }
221                                } else {
222                                    // Invalid hex
223                                    result.push('\\');
224                                    result.push('U');
225                                    result.push_str(&hex_digits);
226                                }
227                            } else {
228                                // Incomplete hex escape
229                                result.push('\\');
230                                result.push('U');
231                                result.push_str(&hex_digits);
232                            }
233                        }
234                        // Unknown escape sequence - preserve as literal
235                        _ => {
236                            result.push('\\');
237                            result.push(escaped);
238                        }
239                    }
240                } else {
241                    // Backslash at end of string
242                    result.push('\\');
243                }
244            } else {
245                result.push(ch);
246            }
247        }
248
249        result
250    }
251
252    /// Create a new scalar with a specific style
253    pub fn with_style(value: impl Into<String>, style: ScalarStyle) -> Self {
254        Self {
255            value: value.into(),
256            style,
257            scalar_type: ScalarType::String,
258        }
259    }
260
261    /// Create a plain scalar
262    pub fn plain(value: impl Into<String>) -> Self {
263        Self::with_style(value, ScalarStyle::Plain)
264    }
265
266    /// Create a single-quoted scalar
267    pub fn single_quoted(value: impl Into<String>) -> Self {
268        Self::with_style(value, ScalarStyle::SingleQuoted)
269    }
270
271    /// Create a double-quoted scalar
272    pub fn double_quoted(value: impl Into<String>) -> Self {
273        Self::with_style(value, ScalarStyle::DoubleQuoted)
274    }
275
276    /// Create a literal scalar
277    pub fn literal(value: impl Into<String>) -> Self {
278        Self::with_style(value, ScalarStyle::Literal)
279    }
280
281    /// Create a folded scalar
282    pub fn folded(value: impl Into<String>) -> Self {
283        Self::with_style(value, ScalarStyle::Folded)
284    }
285
286    /// Create a null scalar
287    pub fn null() -> Self {
288        Self {
289            value: "null".to_string(),
290            style: ScalarStyle::Plain,
291            scalar_type: ScalarType::Null,
292        }
293    }
294
295    /// Create a binary scalar from raw bytes
296    #[cfg(feature = "base64")]
297    pub fn binary(data: &[u8]) -> Self {
298        let encoded = base64_encode(data);
299        Self {
300            value: encoded,
301            style: ScalarStyle::Plain,
302            scalar_type: ScalarType::Binary,
303        }
304    }
305
306    /// Create a timestamp scalar
307    pub fn timestamp(value: impl Into<String>) -> Self {
308        Self {
309            value: value.into(),
310            style: ScalarStyle::Plain,
311            scalar_type: ScalarType::Timestamp,
312        }
313    }
314
315    /// Create a regex scalar
316    pub fn regex(pattern: impl Into<String>) -> Self {
317        Self {
318            value: pattern.into(),
319            style: ScalarStyle::Plain,
320            scalar_type: ScalarType::Regex,
321        }
322    }
323
324    /// Get the raw value
325    pub fn value(&self) -> &str {
326        &self.value
327    }
328
329    /// Get the style
330    pub fn style(&self) -> ScalarStyle {
331        self.style
332    }
333
334    /// Get the scalar type
335    pub fn scalar_type(&self) -> ScalarType {
336        self.scalar_type
337    }
338
339    /// Try to parse this scalar as an `i64`.
340    ///
341    /// Returns `None` if the scalar type is not `Integer`.
342    pub fn to_i64(&self) -> Option<i64> {
343        if self.scalar_type == ScalarType::Integer {
344            Self::parse_integer(&self.value)
345        } else {
346            None
347        }
348    }
349
350    /// Try to parse this scalar as an `f64`.
351    ///
352    /// Returns `None` if the scalar type is not `Float`.
353    pub fn to_f64(&self) -> Option<f64> {
354        if self.scalar_type == ScalarType::Float {
355            self.value.trim().parse::<f64>().ok()
356        } else {
357            None
358        }
359    }
360
361    /// Try to parse this scalar as a `bool`.
362    ///
363    /// Returns `None` if the scalar type is not `Boolean`.
364    /// Recognizes: `true`, `false`, `yes`, `no`, `on`, `off` (case-insensitive).
365    pub fn to_bool(&self) -> Option<bool> {
366        if self.scalar_type == ScalarType::Boolean {
367            match self.value.to_lowercase().as_str() {
368                "true" | "yes" | "on" => Some(true),
369                "false" | "no" | "off" => Some(false),
370                _ => None,
371            }
372        } else {
373            None
374        }
375    }
376
377    /// Extract binary data if this is a binary scalar
378    #[cfg(feature = "base64")]
379    pub fn as_binary(&self) -> Option<Result<Vec<u8>, String>> {
380        match self.scalar_type {
381            ScalarType::Binary => Some(base64_decode(&self.value)),
382            _ => None,
383        }
384    }
385
386    /// Check if this is a binary scalar
387    #[cfg(feature = "base64")]
388    pub fn is_binary(&self) -> bool {
389        self.scalar_type == ScalarType::Binary
390    }
391
392    /// Check if this is a timestamp scalar
393    pub fn is_timestamp(&self) -> bool {
394        self.scalar_type == ScalarType::Timestamp
395    }
396
397    /// Check if this is a regex scalar
398    pub fn is_regex(&self) -> bool {
399        self.scalar_type == ScalarType::Regex
400    }
401
402    /// Compile and return a Regex object if this is a regex scalar
403    ///
404    /// This method is only available when the `regex` feature is enabled.
405    /// Returns None if this is not a regex scalar or if the pattern is invalid.
406    ///
407    /// # Example
408    /// ```
409    /// # #[cfg(feature = "regex")]
410    /// # {
411    /// use yaml_edit::ScalarValue;
412    ///
413    /// let scalar = ScalarValue::regex(r"\d{3}-\d{4}");
414    /// let regex = scalar.as_regex().unwrap();
415    /// assert!(regex.is_match("555-1234"));
416    /// # }
417    /// ```
418    #[cfg(feature = "regex")]
419    pub fn as_regex(&self) -> Option<regex::Regex> {
420        if self.scalar_type == ScalarType::Regex {
421            regex::Regex::new(&self.value).ok()
422        } else {
423            None
424        }
425    }
426
427    /// Try to compile this scalar as a regex, regardless of its type
428    ///
429    /// This method is only available when the `regex` feature is enabled.
430    /// This will attempt to compile the scalar value as a regex pattern,
431    /// even if it's not marked with the !!regex tag.
432    ///
433    /// # Example
434    /// ```
435    /// # #[cfg(feature = "regex")]
436    /// # {
437    /// use yaml_edit::ScalarValue;
438    ///
439    /// let scalar = ScalarValue::string(r"\d+");  // Plain string scalar
440    /// let regex = scalar.try_as_regex().unwrap();
441    /// assert!(regex.is_match("123"));
442    /// # }
443    /// ```
444    #[cfg(feature = "regex")]
445    pub fn try_as_regex(&self) -> Result<regex::Regex, regex::Error> {
446        regex::Regex::new(&self.value)
447    }
448
449    /// Try to coerce this scalar to the specified type
450    pub fn coerce_to_type(&self, target_type: ScalarType) -> Option<ScalarValue> {
451        if self.scalar_type == target_type {
452            return Some(self.clone());
453        }
454
455        match target_type {
456            ScalarType::String => Some(ScalarValue {
457                value: self.value.clone(),
458                style: ScalarStyle::Plain,
459                scalar_type: ScalarType::String,
460            }),
461            ScalarType::Integer => Self::parse_integer(&self.value).map(ScalarValue::from),
462            ScalarType::Float => self.value.parse::<f64>().ok().map(ScalarValue::from),
463            ScalarType::Boolean => match self.value.to_lowercase().as_str() {
464                "true" | "yes" | "on" | "1" => Some(ScalarValue::from(true)),
465                "false" | "no" | "off" | "0" => Some(ScalarValue::from(false)),
466                _ => None,
467            },
468            ScalarType::Null => match self.value.to_lowercase().as_str() {
469                "null" | "~" | "" => Some(ScalarValue::null()),
470                _ => None,
471            },
472            #[cfg(feature = "base64")]
473            ScalarType::Binary => {
474                // Try to decode as base64 to verify it's valid binary data
475                if base64_decode(&self.value).is_ok() {
476                    Some(ScalarValue {
477                        value: self.value.clone(),
478                        style: ScalarStyle::Plain,
479                        scalar_type: ScalarType::Binary,
480                    })
481                } else {
482                    None
483                }
484            }
485            ScalarType::Timestamp => {
486                // Basic timestamp format validation
487                if self.is_valid_timestamp(&self.value) {
488                    Some(ScalarValue::timestamp(&self.value))
489                } else {
490                    None
491                }
492            }
493            ScalarType::Regex => {
494                // For regex, just convert the value
495                Some(ScalarValue::regex(&self.value))
496            }
497        }
498    }
499
500    /// Parse an integer with support for various formats
501    /// Supports: decimal, hexadecimal (0x), binary (0b), octal (0o and legacy 0)
502    pub(crate) fn parse_integer(value: &str) -> Option<i64> {
503        let value = value.trim();
504
505        // Handle negative numbers
506        let (is_negative, value) = if let Some(stripped) = value.strip_prefix('-') {
507            (true, stripped)
508        } else if let Some(stripped) = value.strip_prefix('+') {
509            (false, stripped)
510        } else {
511            (false, value)
512        };
513
514        let parsed = if let Some(hex_part) = value
515            .strip_prefix("0x")
516            .or_else(|| value.strip_prefix("0X"))
517        {
518            // Hexadecimal
519            i64::from_str_radix(hex_part, 16).ok()
520        } else if let Some(bin_part) = value
521            .strip_prefix("0b")
522            .or_else(|| value.strip_prefix("0B"))
523        {
524            // Binary
525            i64::from_str_radix(bin_part, 2).ok()
526        } else if let Some(oct_part) = value
527            .strip_prefix("0o")
528            .or_else(|| value.strip_prefix("0O"))
529        {
530            // Modern octal
531            i64::from_str_radix(oct_part, 8).ok()
532        } else if value.starts_with('0')
533            && value.len() > 1
534            && value.chars().all(|c| c.is_ascii_digit())
535        {
536            // Legacy octal (starts with 0 but not 0x, 0b, 0o)
537            i64::from_str_radix(value, 8).ok()
538        } else {
539            // Decimal
540            value.parse::<i64>().ok()
541        };
542
543        parsed.map(|n| if is_negative { -n } else { n })
544    }
545
546    /// Auto-detect the most appropriate scalar type from a string value
547    pub fn auto_detect_type(value: &str) -> ScalarType {
548        // Check for null values first
549        match value.to_lowercase().as_str() {
550            "null" | "~" | "" => return ScalarType::Null,
551            _ => {}
552        }
553
554        // Check for boolean values
555        match value.to_lowercase().as_str() {
556            "true" | "false" | "yes" | "no" | "on" | "off" => return ScalarType::Boolean,
557            _ => {}
558        }
559
560        // Check for numbers with various formats
561        if Self::parse_integer(value).is_some() {
562            return ScalarType::Integer;
563        }
564        if value.parse::<f64>().is_ok() {
565            return ScalarType::Float;
566        }
567
568        // Check for timestamps (basic patterns)
569        if Self::is_valid_timestamp_static(value) {
570            return ScalarType::Timestamp;
571        }
572
573        // Check for binary data (base64)
574        #[cfg(feature = "base64")]
575        if Self::looks_like_base64(value) && base64_decode(value).is_ok() {
576            return ScalarType::Binary;
577        }
578
579        // Default to string
580        ScalarType::String
581    }
582
583    /// Parse a YAML scalar value with automatic type detection
584    ///
585    /// This method automatically detects the YAML type based on the content:
586    /// - "123" β†’ Integer
587    /// - "3.14" β†’ Float
588    /// - "true" / "false" β†’ Boolean
589    /// - "null" / "~" β†’ Null
590    /// - etc.
591    ///
592    /// # Examples
593    ///
594    /// ```
595    /// use yaml_edit::{ScalarValue, ScalarType};
596    ///
597    /// let scalar = ScalarValue::parse("123");
598    /// assert_eq!(scalar.scalar_type(), ScalarType::Integer);
599    /// assert_eq!(scalar.to_yaml_string(), "123");
600    ///
601    /// let scalar = ScalarValue::parse("true");
602    /// assert_eq!(scalar.scalar_type(), ScalarType::Boolean);
603    /// assert_eq!(scalar.to_yaml_string(), "true");
604    ///
605    /// let scalar = ScalarValue::parse("hello");
606    /// assert_eq!(scalar.scalar_type(), ScalarType::String);
607    /// assert_eq!(scalar.to_yaml_string(), "hello");
608    /// ```
609    ///
610    /// To create a String-type scalar without auto-detection (e.g., to represent
611    /// the string "123" rather than the integer 123), use [`ScalarValue::string()`] instead.
612    pub fn parse(value: impl Into<String>) -> Self {
613        let value = value.into();
614        let scalar_type = Self::auto_detect_type(&value);
615        // For non-string types, use Plain style (no quotes)
616        // For string types, detect appropriate style
617        let style = match scalar_type {
618            ScalarType::String => Self::detect_style(&value),
619            // All other types use plain style
620            _ => ScalarStyle::Plain,
621        };
622
623        Self {
624            value,
625            style,
626            scalar_type,
627        }
628    }
629
630    /// Create a ScalarValue from a Scalar syntax node, preserving the type from the lexer
631    ///
632    /// This extracts type information directly from the token kind (INT, BOOL, FLOAT, etc.)
633    /// rather than guessing based on heuristics. This is the correct way to convert
634    /// parsed YAML into ScalarValue.
635    pub fn from_scalar(scalar: &crate::yaml::Scalar) -> Self {
636        use crate::lex::SyntaxKind;
637        use rowan::ast::AstNode;
638
639        let value = scalar.as_string();
640
641        // Get the token kind from the first token in the scalar
642        let syntax_node = scalar.syntax();
643        let scalar_type = if let Some(token) = syntax_node.first_token() {
644            match token.kind() {
645                SyntaxKind::INT => ScalarType::Integer,
646                SyntaxKind::FLOAT => ScalarType::Float,
647                SyntaxKind::BOOL => ScalarType::Boolean,
648                SyntaxKind::NULL => ScalarType::Null,
649                SyntaxKind::STRING => ScalarType::String,
650                _ => ScalarType::String, // fallback
651            }
652        } else {
653            ScalarType::String
654        };
655
656        // Determine style based on the actual text (with quotes if present)
657        let raw_text = scalar.value();
658        let style = if raw_text.starts_with('"') && raw_text.ends_with('"') {
659            ScalarStyle::DoubleQuoted
660        } else if raw_text.starts_with('\'') && raw_text.ends_with('\'') {
661            ScalarStyle::SingleQuoted
662        } else {
663            ScalarStyle::Plain
664        };
665
666        Self {
667            value,
668            style,
669            scalar_type,
670        }
671    }
672
673    /// Check if a string looks like base64 encoded data
674    #[cfg(feature = "base64")]
675    fn looks_like_base64(value: &str) -> bool {
676        if value.is_empty() {
677            return false;
678        }
679
680        // Must be reasonable length and contain only base64 characters
681        // Also need to check that padding is only at the end
682        if value.len() < 4 || value.len() % 4 != 0 {
683            return false;
684        }
685
686        let padding_count = value.chars().filter(|&c| c == '=').count();
687        if padding_count > 2 {
688            return false;
689        }
690
691        // Check all characters are valid base64
692        if !value
693            .chars()
694            .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '+' | '/' | '='))
695        {
696            return false;
697        }
698
699        // Check that padding is only at the end
700        if padding_count > 0 {
701            let padding_start = value.len() - padding_count;
702            if !value[padding_start..].chars().all(|c| c == '=') {
703                return false;
704            }
705            // Check that non-padding part doesn't contain '='
706            if value[..padding_start].contains('=') {
707                return false;
708            }
709        }
710
711        // Final validation: try to decode it to ensure it's actually valid base64
712        // This will catch cases like "SGVs" which looks valid but isn't proper base64
713        base64_decode(value).is_ok()
714    }
715
716    /// Basic timestamp format validation
717    fn is_valid_timestamp(&self, value: &str) -> bool {
718        Self::is_valid_timestamp_static(value)
719    }
720
721    /// Static version of timestamp validation
722    fn is_valid_timestamp_static(value: &str) -> bool {
723        // Basic patterns for common timestamp formats
724        // ISO 8601: YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS etc.
725        if Self::matches_iso8601_pattern(value) {
726            return true;
727        }
728
729        // Unix timestamp (seconds since epoch)
730        if let Ok(timestamp) = value.parse::<u64>() {
731            // Reasonable range: between 1970 and 2100
732            return timestamp > 0 && timestamp < 4_102_444_800; // 2100-01-01
733        }
734
735        false
736    }
737
738    /// Simple pattern matching for ISO 8601 timestamps
739    fn matches_iso8601_pattern(value: &str) -> bool {
740        let chars: Vec<char> = value.chars().collect();
741
742        // Must be at least YYYY-MM-DD (10 chars)
743        if chars.len() < 10 {
744            return false;
745        }
746
747        // Check YYYY-MM-DD pattern
748        if !(chars[0..4].iter().all(|c| c.is_ascii_digit())
749            && chars[4] == '-'
750            && chars[5..7].iter().all(|c| c.is_ascii_digit())
751            && chars[7] == '-'
752            && chars[8..10].iter().all(|c| c.is_ascii_digit()))
753        {
754            return false;
755        }
756
757        // Validate month and day ranges (basic validation)
758        let month_str: String = chars[5..7].iter().collect();
759        let day_str: String = chars[8..10].iter().collect();
760
761        if let (Ok(month), Ok(day)) = (month_str.parse::<u8>(), day_str.parse::<u8>()) {
762            if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
763                return false;
764            }
765        } else {
766            return false;
767        }
768
769        // If it's just YYYY-MM-DD, that's valid
770        if chars.len() == 10 {
771            return true;
772        }
773
774        // Check for time part: T or t or space followed by HH:MM:SS
775        if chars.len() >= 19 {
776            let sep = chars[10];
777            if (sep == 'T' || sep == 't' || sep == ' ')
778                && chars[11..13].iter().all(|c| c.is_ascii_digit())
779                && chars[13] == ':'
780                && chars[14..16].iter().all(|c| c.is_ascii_digit())
781                && chars[16] == ':'
782                && chars[17..19].iter().all(|c| c.is_ascii_digit())
783            {
784                // Validate hour, minute, second ranges
785                let hour_str: String = chars[11..13].iter().collect();
786                let minute_str: String = chars[14..16].iter().collect();
787                let second_str: String = chars[17..19].iter().collect();
788
789                if let (Ok(hour), Ok(minute), Ok(second)) = (
790                    hour_str.parse::<u8>(),
791                    minute_str.parse::<u8>(),
792                    second_str.parse::<u8>(),
793                ) {
794                    if hour > 23 || minute > 59 || second > 59 {
795                        return false;
796                    }
797                } else {
798                    return false;
799                }
800
801                return true;
802            }
803        }
804
805        false
806    }
807
808    /// Detect the appropriate style for a value
809    fn detect_style(value: &str) -> ScalarStyle {
810        // Check if value needs quoting
811        if Self::needs_quoting(value) {
812            // Prefer single quotes if no single quotes in value
813            if !value.contains('\'') {
814                ScalarStyle::SingleQuoted
815            } else {
816                ScalarStyle::DoubleQuoted
817            }
818        } else if value.contains('\n') {
819            // Multi-line strings use literal style
820            ScalarStyle::Literal
821        } else {
822            ScalarStyle::Plain
823        }
824    }
825
826    /// Check if a value needs quoting when treated as a string
827    fn needs_quoting(value: &str) -> bool {
828        // Empty string needs quotes
829        if value.is_empty() {
830            return true;
831        }
832
833        // Check for YAML keywords that would be misinterpreted
834        // These need quotes when we want them as strings
835        if value.eq_ignore_ascii_case("true")
836            || value.eq_ignore_ascii_case("false")
837            || value.eq_ignore_ascii_case("yes")
838            || value.eq_ignore_ascii_case("no")
839            || value.eq_ignore_ascii_case("on")
840            || value.eq_ignore_ascii_case("off")
841            || value.eq_ignore_ascii_case("null")
842            || value == "~"
843        {
844            return true;
845        }
846
847        // Also quote things that look like numbers to preserve them as strings
848        if value.parse::<f64>().is_ok() || Self::parse_integer(value).is_some() {
849            return true;
850        }
851
852        // Check if starts with special characters
853        if value.starts_with(|ch: char| {
854            matches!(ch, '-' | '?' | '[' | ']' | '{' | '}' | ',' | '>' | '<')
855        }) {
856            return true;
857        }
858
859        // Check for special characters that require quoting
860        // : and # need context-aware checking (only ambiguous before whitespace or at end)
861        let mut chars = value.chars().peekable();
862        while let Some(ch) = chars.next() {
863            match ch {
864                '&' | '*' | '!' | '|' | '\'' | '"' | '%' => return true,
865                ':' | '#' => {
866                    if chars.peek().map_or(true, |next| next.is_whitespace()) {
867                        return true;
868                    }
869                }
870                _ => {}
871            }
872        }
873
874        // Leading/trailing whitespace needs quotes
875        if value != value.trim() {
876            return true;
877        }
878
879        false
880    }
881
882    /// Render the scalar as a YAML string with proper escaping
883    pub fn to_yaml_string(&self) -> String {
884        // For special data types, always include the tag regardless of style
885        let tag_prefix = match self.scalar_type {
886            #[cfg(feature = "base64")]
887            ScalarType::Binary => "!!binary ",
888            ScalarType::Timestamp => "!!timestamp ",
889            ScalarType::Regex => "!!regex ",
890            _ => "",
891        };
892
893        let content = match self.style {
894            ScalarStyle::Plain => {
895                // Check if we need to quote based on type vs content
896                match self.scalar_type {
897                    ScalarType::String => {
898                        // For strings, quote if the content looks like a special value
899                        if Self::needs_quoting(&self.value) {
900                            self.to_single_quoted()
901                        } else {
902                            self.value.clone()
903                        }
904                    }
905                    // For non-strings, output as plain (unquoted)
906                    ScalarType::Integer
907                    | ScalarType::Float
908                    | ScalarType::Boolean
909                    | ScalarType::Null
910                    | ScalarType::Timestamp
911                    | ScalarType::Regex => self.value.clone(),
912                    #[cfg(feature = "base64")]
913                    ScalarType::Binary => self.value.clone(),
914                }
915            }
916            ScalarStyle::SingleQuoted => self.to_single_quoted(),
917            ScalarStyle::DoubleQuoted => self.to_double_quoted(),
918            ScalarStyle::Literal => self.to_literal(),
919            ScalarStyle::Folded => self.to_folded(),
920        };
921
922        format!("{}{}", tag_prefix, content)
923    }
924
925    /// Convert to single-quoted string
926    fn to_single_quoted(&self) -> String {
927        // Escape single quotes by doubling them
928        let escaped = self.value.replace('\'', "''");
929        format!("'{}'", escaped)
930    }
931
932    /// Convert to double-quoted string
933    fn to_double_quoted(&self) -> String {
934        let mut result = String::from("\"");
935        for ch in self.value.chars() {
936            match ch {
937                '"' => result.push_str("\\\""),
938                '\\' => result.push_str("\\\\"),
939                '\n' => result.push_str("\\n"),
940                '\r' => result.push_str("\\r"),
941                '\t' => result.push_str("\\t"),
942                '\x08' => result.push_str("\\b"),
943                '\x0C' => result.push_str("\\f"),
944                '\x07' => result.push_str("\\a"), // bell
945                '\x1B' => result.push_str("\\e"), // escape
946                '\x0B' => result.push_str("\\v"), // vertical tab
947                '\0' => result.push_str("\\0"),   // null
948                c if c.is_control() || (c as u32) > 0x7F => {
949                    // Handle Unicode characters and control characters
950                    let code_point = c as u32;
951                    if code_point <= 0xFF {
952                        result.push_str(&format!("\\x{:02X}", code_point));
953                    } else if code_point <= 0xFFFF {
954                        result.push_str(&format!("\\u{:04X}", code_point));
955                    } else {
956                        result.push_str(&format!("\\U{:08X}", code_point));
957                    }
958                }
959                c => result.push(c),
960            }
961        }
962        result.push('"');
963        result
964    }
965
966    /// Convert to literal block scalar
967    fn to_literal(&self) -> String {
968        self.to_literal_with_indent(2)
969    }
970
971    /// Convert to folded block scalar
972    fn to_folded(&self) -> String {
973        self.to_folded_with_indent(2)
974    }
975
976    /// Convert to literal block scalar with specific indentation
977    pub fn to_literal_with_indent(&self, indent: usize) -> String {
978        let indent_str = " ".repeat(indent);
979
980        // Detect the existing indentation of the content
981        let existing_indent = self.detect_content_indentation();
982
983        // If content already has consistent indentation, preserve it
984        if existing_indent.is_some() {
985            format!("|\n{}", self.value)
986        } else {
987            // Add consistent indentation
988            let indented = self
989                .value
990                .lines()
991                .map(|line| {
992                    if line.trim().is_empty() {
993                        String::new()
994                    } else {
995                        format!("{}{}", indent_str, line)
996                    }
997                })
998                .collect::<Vec<_>>()
999                .join("\n");
1000            format!("|\n{}", indented)
1001        }
1002    }
1003
1004    /// Convert to folded block scalar with specific indentation
1005    pub fn to_folded_with_indent(&self, indent: usize) -> String {
1006        let indent_str = " ".repeat(indent);
1007
1008        // Detect the existing indentation of the content
1009        let existing_indent = self.detect_content_indentation();
1010
1011        // If content already has consistent indentation, preserve it
1012        if existing_indent.is_some() {
1013            format!(">\n{}", self.value)
1014        } else {
1015            // Add consistent indentation
1016            let indented = self
1017                .value
1018                .lines()
1019                .map(|line| {
1020                    if line.trim().is_empty() {
1021                        String::new()
1022                    } else {
1023                        format!("{}{}", indent_str, line)
1024                    }
1025                })
1026                .collect::<Vec<_>>()
1027                .join("\n");
1028            format!(">\n{}", indented)
1029        }
1030    }
1031
1032    /// Detect the minimum indentation level of non-empty lines in the content
1033    fn detect_content_indentation(&self) -> Option<usize> {
1034        let non_empty_lines: Vec<&str> = self
1035            .value
1036            .lines()
1037            .filter(|line| !line.trim().is_empty())
1038            .collect();
1039
1040        if non_empty_lines.is_empty() {
1041            return None;
1042        }
1043
1044        let mut min_indent = None;
1045        let mut all_have_same_indent = true;
1046
1047        for line in non_empty_lines {
1048            let indent = line.len() - line.trim_start().len();
1049            match min_indent {
1050                None => min_indent = Some(indent),
1051                Some(current_min) => {
1052                    if indent != current_min {
1053                        all_have_same_indent = false;
1054                    }
1055                    min_indent = Some(current_min.min(indent));
1056                }
1057            }
1058        }
1059
1060        // Only preserve indentation if all lines have some consistent structure
1061        if all_have_same_indent && min_indent.unwrap_or(0) > 0 {
1062            min_indent
1063        } else {
1064            None
1065        }
1066    }
1067}
1068
1069impl fmt::Display for ScalarValue {
1070    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1071        write!(f, "{}", self.to_yaml_string())
1072    }
1073}
1074
1075impl From<String> for ScalarValue {
1076    fn from(value: String) -> Self {
1077        Self::string(value)
1078    }
1079}
1080
1081impl From<&str> for ScalarValue {
1082    fn from(value: &str) -> Self {
1083        Self::string(value)
1084    }
1085}
1086
1087impl From<i32> for ScalarValue {
1088    fn from(value: i32) -> Self {
1089        Self {
1090            value: value.to_string(),
1091            style: ScalarStyle::Plain,
1092            scalar_type: ScalarType::Integer,
1093        }
1094    }
1095}
1096
1097impl From<i64> for ScalarValue {
1098    fn from(value: i64) -> Self {
1099        Self {
1100            value: value.to_string(),
1101            style: ScalarStyle::Plain,
1102            scalar_type: ScalarType::Integer,
1103        }
1104    }
1105}
1106
1107impl From<f32> for ScalarValue {
1108    fn from(value: f32) -> Self {
1109        Self {
1110            value: value.to_string(),
1111            style: ScalarStyle::Plain,
1112            scalar_type: ScalarType::Float,
1113        }
1114    }
1115}
1116
1117impl From<f64> for ScalarValue {
1118    fn from(value: f64) -> Self {
1119        Self {
1120            value: value.to_string(),
1121            style: ScalarStyle::Plain,
1122            scalar_type: ScalarType::Float,
1123        }
1124    }
1125}
1126
1127impl From<bool> for ScalarValue {
1128    fn from(value: bool) -> Self {
1129        Self {
1130            value: if value { "true" } else { "false" }.to_string(),
1131            style: ScalarStyle::Plain,
1132            scalar_type: ScalarType::Boolean,
1133        }
1134    }
1135}
1136
1137impl From<crate::yaml::Scalar> for ScalarValue {
1138    fn from(scalar: crate::yaml::Scalar) -> Self {
1139        let value = scalar.as_string();
1140        ScalarValue::parse(&value)
1141    }
1142}
1143
1144impl crate::AsYaml for ScalarValue {
1145    fn as_node(&self) -> Option<&crate::yaml::SyntaxNode> {
1146        None
1147    }
1148
1149    fn kind(&self) -> crate::as_yaml::YamlKind {
1150        crate::as_yaml::YamlKind::Scalar
1151    }
1152
1153    fn build_content(
1154        &self,
1155        builder: &mut rowan::GreenNodeBuilder,
1156        _indent: usize,
1157        _flow_context: bool,
1158    ) -> bool {
1159        use crate::lex::SyntaxKind;
1160        let token_kind = match self.scalar_type() {
1161            ScalarType::Integer => SyntaxKind::INT,
1162            ScalarType::Float => SyntaxKind::FLOAT,
1163            ScalarType::Boolean => SyntaxKind::BOOL,
1164            ScalarType::Null => SyntaxKind::NULL,
1165            _ => SyntaxKind::STRING,
1166        };
1167        builder.start_node(SyntaxKind::SCALAR.into());
1168        builder.token(token_kind.into(), self.value());
1169        builder.finish_node();
1170        false
1171    }
1172
1173    fn is_inline(&self) -> bool {
1174        true
1175    }
1176}
1177
1178#[cfg(test)]
1179mod tests {
1180    use super::*;
1181
1182    #[test]
1183    fn test_plain_scalars() {
1184        let scalar = ScalarValue::string("simple");
1185        assert_eq!(scalar.to_yaml_string(), "simple");
1186
1187        let scalar = ScalarValue::string("hello world");
1188        assert_eq!(scalar.to_yaml_string(), "hello world");
1189    }
1190
1191    #[test]
1192    fn test_values_needing_quotes() {
1193        // Boolean-like values
1194        let scalar = ScalarValue::string("true");
1195        assert_eq!(scalar.to_yaml_string(), "'true'");
1196
1197        let scalar = ScalarValue::string("false");
1198        assert_eq!(scalar.to_yaml_string(), "'false'");
1199
1200        let scalar = ScalarValue::string("yes");
1201        assert_eq!(scalar.to_yaml_string(), "'yes'");
1202
1203        let scalar = ScalarValue::string("no");
1204        assert_eq!(scalar.to_yaml_string(), "'no'");
1205
1206        // Null-like values
1207        let scalar = ScalarValue::string("null");
1208        assert_eq!(scalar.to_yaml_string(), "'null'");
1209
1210        let scalar = ScalarValue::string("~");
1211        assert_eq!(scalar.to_yaml_string(), "'~'");
1212
1213        // Numbers
1214        let scalar = ScalarValue::string("123");
1215        assert_eq!(scalar.to_yaml_string(), "'123'");
1216
1217        let scalar = ScalarValue::string("3.14");
1218        assert_eq!(scalar.to_yaml_string(), "'3.14'");
1219
1220        // Special characters
1221        let scalar = ScalarValue::string("value: something");
1222        assert_eq!(scalar.to_yaml_string(), "'value: something'");
1223
1224        let scalar = ScalarValue::string("# comment");
1225        assert_eq!(scalar.to_yaml_string(), "'# comment'");
1226
1227        // Leading/trailing whitespace
1228        let scalar = ScalarValue::string("  spaces  ");
1229        assert_eq!(scalar.to_yaml_string(), "'  spaces  '");
1230    }
1231
1232    #[test]
1233    fn test_single_quoted() {
1234        let scalar = ScalarValue::single_quoted("value with 'quotes'");
1235        assert_eq!(scalar.to_yaml_string(), "'value with ''quotes'''");
1236    }
1237
1238    #[test]
1239    fn test_double_quoted() {
1240        let scalar = ScalarValue::double_quoted("value with \"quotes\" and \\backslash");
1241        assert_eq!(
1242            scalar.to_yaml_string(),
1243            "\"value with \\\"quotes\\\" and \\\\backslash\""
1244        );
1245
1246        let scalar = ScalarValue::double_quoted("line1\nline2\ttab");
1247        assert_eq!(scalar.to_yaml_string(), "\"line1\\nline2\\ttab\"");
1248    }
1249
1250    #[test]
1251    fn test_multiline() {
1252        let scalar = ScalarValue::string("line1\nline2\nline3");
1253        // Should auto-detect literal style for multiline
1254        assert_eq!(scalar.style(), ScalarStyle::Literal);
1255    }
1256
1257    #[test]
1258    fn test_from_types() {
1259        let scalar = ScalarValue::from(42);
1260        assert_eq!(scalar.to_yaml_string(), "42");
1261
1262        let scalar = ScalarValue::from(1.234);
1263        assert_eq!(scalar.to_yaml_string(), "1.234");
1264
1265        let scalar = ScalarValue::from(true);
1266        assert_eq!(scalar.to_yaml_string(), "true");
1267
1268        let scalar = ScalarValue::from(false);
1269        assert_eq!(scalar.to_yaml_string(), "false");
1270    }
1271
1272    #[test]
1273    fn test_empty_string() {
1274        let scalar = ScalarValue::string("");
1275        assert_eq!(scalar.to_yaml_string(), "''");
1276    }
1277
1278    #[test]
1279    fn test_special_start_chars() {
1280        let scalar = ScalarValue::string("-item");
1281        assert_eq!(scalar.to_yaml_string(), "'-item'");
1282
1283        let scalar = ScalarValue::string("?key");
1284        assert_eq!(scalar.to_yaml_string(), "'?key'");
1285
1286        let scalar = ScalarValue::string("[array]");
1287        assert_eq!(scalar.to_yaml_string(), "'[array]'");
1288    }
1289
1290    #[test]
1291    fn test_null_scalar() {
1292        let scalar = ScalarValue::null();
1293        assert_eq!(scalar.to_yaml_string(), "null");
1294        assert_eq!(scalar.scalar_type, ScalarType::Null);
1295    }
1296
1297    #[test]
1298    fn test_escape_sequences_basic() {
1299        // Test basic escape sequences
1300        assert_eq!(
1301            ScalarValue::parse_escape_sequences("hello\\nworld"),
1302            "hello\nworld"
1303        );
1304        assert_eq!(
1305            ScalarValue::parse_escape_sequences("tab\\there"),
1306            "tab\there"
1307        );
1308        assert_eq!(
1309            ScalarValue::parse_escape_sequences("quote\\\"test"),
1310            "quote\"test"
1311        );
1312        assert_eq!(
1313            ScalarValue::parse_escape_sequences("back\\\\slash"),
1314            "back\\slash"
1315        );
1316        assert_eq!(
1317            ScalarValue::parse_escape_sequences("return\\rtest"),
1318            "return\rtest"
1319        );
1320    }
1321
1322    #[test]
1323    fn test_escape_sequences_control_chars() {
1324        // Test control character escapes
1325        assert_eq!(ScalarValue::parse_escape_sequences("bell\\a"), "bell\x07");
1326        assert_eq!(
1327            ScalarValue::parse_escape_sequences("backspace\\b"),
1328            "backspace\x08"
1329        );
1330        assert_eq!(
1331            ScalarValue::parse_escape_sequences("formfeed\\f"),
1332            "formfeed\x0C"
1333        );
1334        assert_eq!(
1335            ScalarValue::parse_escape_sequences("escape\\e"),
1336            "escape\x1B"
1337        );
1338        assert_eq!(ScalarValue::parse_escape_sequences("vtab\\v"), "vtab\x0B");
1339        assert_eq!(ScalarValue::parse_escape_sequences("null\\0"), "null\0");
1340        assert_eq!(ScalarValue::parse_escape_sequences("slash\\/"), "slash/");
1341    }
1342
1343    #[test]
1344    fn test_escape_sequences_unicode_x() {
1345        // Test \xNN escape sequences
1346        assert_eq!(ScalarValue::parse_escape_sequences("\\x41"), "A"); // 0x41 = 'A'
1347        assert_eq!(ScalarValue::parse_escape_sequences("\\x7A"), "z"); // 0x7A = 'z'
1348        assert_eq!(ScalarValue::parse_escape_sequences("\\x20"), " "); // 0x20 = space
1349        assert_eq!(ScalarValue::parse_escape_sequences("\\xFF"), "\u{FF}"); // 0xFF = ΓΏ
1350
1351        // Test invalid hex sequences
1352        assert_eq!(ScalarValue::parse_escape_sequences("\\xGH"), "\\xGH"); // Invalid hex
1353        assert_eq!(ScalarValue::parse_escape_sequences("\\x4"), "\\x4"); // Incomplete
1354    }
1355
1356    #[test]
1357    fn test_escape_sequences_unicode_u() {
1358        // Test \uNNNN escape sequences
1359        assert_eq!(ScalarValue::parse_escape_sequences("\\u0041"), "A"); // 0x0041 = 'A'
1360        assert_eq!(ScalarValue::parse_escape_sequences("\\u03B1"), "Ξ±"); // Greek alpha
1361        assert_eq!(ScalarValue::parse_escape_sequences("\\u2603"), "β˜ƒ"); // Snowman
1362        assert_eq!(ScalarValue::parse_escape_sequences("\\u4E2D"), "δΈ­"); // Chinese character
1363
1364        // Test invalid sequences
1365        assert_eq!(ScalarValue::parse_escape_sequences("\\uGHIJ"), "\\uGHIJ"); // Invalid hex
1366        assert_eq!(ScalarValue::parse_escape_sequences("\\u041"), "\\u041"); // Incomplete
1367    }
1368
1369    #[test]
1370    fn test_escape_sequences_unicode_capital_u() {
1371        // Test \UNNNNNNNN escape sequences
1372        assert_eq!(ScalarValue::parse_escape_sequences("\\U00000041"), "A"); // 0x00000041 = 'A'
1373        assert_eq!(ScalarValue::parse_escape_sequences("\\U0001F603"), "πŸ˜ƒ"); // Smiley emoji
1374        assert_eq!(ScalarValue::parse_escape_sequences("\\U0001F4A9"), "πŸ’©"); // Pile of poo emoji
1375
1376        // Test invalid sequences
1377        assert_eq!(
1378            ScalarValue::parse_escape_sequences("\\UGHIJKLMN"),
1379            "\\UGHIJKLMN"
1380        ); // Invalid hex
1381        assert_eq!(
1382            ScalarValue::parse_escape_sequences("\\U0000004"),
1383            "\\U0000004"
1384        ); // Incomplete
1385        assert_eq!(
1386            ScalarValue::parse_escape_sequences("\\UFFFFFFFF"),
1387            "\\UFFFFFFFF"
1388        ); // Invalid code point
1389    }
1390
1391    #[test]
1392    fn test_escape_sequences_line_folding() {
1393        // Test line folding with escaped spaces and newlines
1394        assert_eq!(
1395            ScalarValue::parse_escape_sequences("line\\ \nfolding"),
1396            "linefolding"
1397        );
1398        assert_eq!(
1399            ScalarValue::parse_escape_sequences("escaped\\nline\\nbreak"),
1400            "escaped\nline\nbreak"
1401        );
1402        assert_eq!(
1403            ScalarValue::parse_escape_sequences("remove\\\nline\\nbreak"),
1404            "removeline\nbreak"
1405        );
1406    }
1407
1408    #[test]
1409    fn test_escape_sequences_mixed() {
1410        // Test mixed escape sequences
1411        let input = "Hello\\nWorld\\u0021\\x20\\U0001F44D";
1412        let expected = "Hello\nWorld! πŸ‘";
1413        assert_eq!(ScalarValue::parse_escape_sequences(input), expected);
1414
1415        // Test with quotes and backslashes
1416        let input = "Quote\\\"back\\\\slash\\ttab";
1417        let expected = "Quote\"back\\slash\ttab";
1418        assert_eq!(ScalarValue::parse_escape_sequences(input), expected);
1419    }
1420
1421    #[test]
1422    fn test_escape_sequences_unknown() {
1423        // Test unknown escape sequences are preserved
1424        assert_eq!(ScalarValue::parse_escape_sequences("\\q"), "\\q");
1425        assert_eq!(ScalarValue::parse_escape_sequences("\\z"), "\\z");
1426        assert_eq!(ScalarValue::parse_escape_sequences("\\1"), "\\1");
1427    }
1428
1429    #[test]
1430    fn test_indentation_preservation() {
1431        // Test preserving exact indentation in block scalars
1432        let content_with_indent = "  Line 1\n    Line 2 more indented\n  Line 3";
1433        let scalar = ScalarValue::literal(content_with_indent);
1434
1435        // Should detect that content already has indentation and preserve it
1436        let yaml_output = scalar.to_literal_with_indent(2);
1437        assert_eq!(
1438            yaml_output,
1439            "|\n    Line 1\n      Line 2 more indented\n    Line 3"
1440        );
1441    }
1442
1443    #[test]
1444    fn test_indentation_detection() {
1445        // Test content with consistent indentation
1446        let consistent_content = "  Line 1\n  Line 2\n  Line 3";
1447        let scalar1 = ScalarValue::literal(consistent_content);
1448        assert_eq!(scalar1.detect_content_indentation(), Some(2));
1449
1450        // Test content with no indentation
1451        let no_indent_content = "Line 1\nLine 2\nLine 3";
1452        let scalar2 = ScalarValue::literal(no_indent_content);
1453        assert_eq!(scalar2.detect_content_indentation(), None);
1454
1455        // Test content with inconsistent indentation
1456        let inconsistent_content = "  Line 1\n    Line 2\n Line 3";
1457        let scalar3 = ScalarValue::literal(inconsistent_content);
1458        assert_eq!(scalar3.detect_content_indentation(), None);
1459
1460        // Test empty content
1461        let empty_content = "";
1462        let scalar4 = ScalarValue::literal(empty_content);
1463        assert_eq!(scalar4.detect_content_indentation(), None);
1464
1465        // Test content with only whitespace lines
1466        let whitespace_content = "  Line 1\n\n  Line 3";
1467        let scalar5 = ScalarValue::literal(whitespace_content);
1468        assert_eq!(scalar5.detect_content_indentation(), Some(2));
1469    }
1470
1471    #[test]
1472    fn test_literal_with_custom_indent() {
1473        // Test applying custom indentation to unindented content
1474        let content = "Line 1\nLine 2\nLine 3";
1475        let scalar = ScalarValue::literal(content);
1476
1477        let yaml_4_spaces = scalar.to_literal_with_indent(4);
1478        assert_eq!(yaml_4_spaces, "|\n    Line 1\n    Line 2\n    Line 3");
1479
1480        let yaml_1_space = scalar.to_literal_with_indent(1);
1481        assert_eq!(yaml_1_space, "|\n Line 1\n Line 2\n Line 3");
1482    }
1483
1484    #[test]
1485    fn test_folded_with_custom_indent() {
1486        // Test applying custom indentation to folded scalars
1487        let content = "Line 1\nLine 2\nLine 3";
1488        let scalar = ScalarValue::folded(content);
1489
1490        let yaml_3_spaces = scalar.to_folded_with_indent(3);
1491        assert_eq!(yaml_3_spaces, ">\n   Line 1\n   Line 2\n   Line 3");
1492    }
1493
1494    #[test]
1495    fn test_mixed_empty_lines_preservation() {
1496        // Test handling of empty lines in block scalars
1497        let content_with_empty_lines = "Line 1\n\nLine 3\n\n\nLine 6";
1498        let scalar = ScalarValue::literal(content_with_empty_lines);
1499
1500        let yaml_output = scalar.to_literal_with_indent(2);
1501        assert_eq!(yaml_output, "|\n  Line 1\n\n  Line 3\n\n\n  Line 6");
1502
1503        // Empty lines should remain empty (no indentation added)
1504        // Input has 3 empty lines; they should appear unchanged in the output
1505        let lines: Vec<&str> = yaml_output.lines().collect();
1506        let empty_line_count = lines.iter().filter(|line| line.is_empty()).count();
1507        assert_eq!(empty_line_count, 3);
1508    }
1509
1510    #[test]
1511    fn test_escape_sequences_edge_cases() {
1512        // Test edge cases
1513        assert_eq!(ScalarValue::parse_escape_sequences(""), "");
1514        assert_eq!(ScalarValue::parse_escape_sequences("\\"), "\\");
1515        assert_eq!(
1516            ScalarValue::parse_escape_sequences("no escapes"),
1517            "no escapes"
1518        );
1519        assert_eq!(ScalarValue::parse_escape_sequences("\\\\\\\\"), "\\\\");
1520    }
1521
1522    #[test]
1523    fn test_double_quoted_with_escapes() {
1524        // Test that double-quoted scalars properly escape and unescape
1525        let original = "Hello\nWorld\tπŸ˜ƒ";
1526        let scalar = ScalarValue::double_quoted(original);
1527        let yaml_string = scalar.to_yaml_string();
1528
1529        // Should contain escaped sequences
1530        assert_eq!(yaml_string, "\"Hello\\nWorld\\t\\U0001F603\"");
1531
1532        // Parse it back
1533        let parsed = ScalarValue::parse_escape_sequences(&yaml_string[1..yaml_string.len() - 1]);
1534        assert_eq!(parsed, original);
1535    }
1536
1537    #[test]
1538    fn test_unicode_output_formatting() {
1539        // Test that Unicode characters are properly formatted in output
1540        let scalar = ScalarValue::double_quoted("Hello δΈ–η•Œ 🌍");
1541        let yaml_string = scalar.to_yaml_string();
1542
1543        // Should escape non-ASCII characters
1544        assert_eq!(yaml_string, "\"Hello \\u4E16\\u754C \\U0001F30D\"");
1545
1546        // But the internal value should remain unchanged
1547        assert_eq!(scalar.value(), "Hello δΈ–η•Œ 🌍");
1548    }
1549
1550    #[test]
1551    #[cfg(feature = "base64")]
1552    fn test_binary_data_encoding() {
1553        // Test creating binary scalar from raw bytes
1554        let data = b"Hello, World!";
1555        let scalar = ScalarValue::binary(data);
1556
1557        assert!(scalar.is_binary());
1558        assert_eq!(scalar.scalar_type(), ScalarType::Binary);
1559
1560        // Should produce valid base64
1561        let yaml_output = scalar.to_yaml_string();
1562        assert!(yaml_output.starts_with("!!binary "));
1563
1564        // Should be able to decode back to original data
1565        if let Some(decoded_result) = scalar.as_binary() {
1566            let decoded = decoded_result.expect("Should decode successfully");
1567            assert_eq!(decoded, data);
1568        } else {
1569            panic!("Should be able to extract binary data");
1570        }
1571    }
1572
1573    #[test]
1574    #[cfg(feature = "base64")]
1575    fn test_base64_encoding_decoding() {
1576        // Test various byte sequences
1577        let test_cases = [
1578            b"".as_slice(),
1579            b"A",
1580            b"AB",
1581            b"ABC",
1582            b"ABCD",
1583            b"Hello, World!",
1584            &[0, 1, 2, 3, 255, 254, 253],
1585        ];
1586
1587        for data in test_cases {
1588            let encoded = base64_encode(data);
1589            let decoded = base64_decode(&encoded).expect("Should decode successfully");
1590            assert_eq!(decoded, data, "Failed for data: {:?}", data);
1591        }
1592    }
1593
1594    #[test]
1595    fn test_timestamp_creation_and_validation() {
1596        // Test various timestamp formats
1597        let valid_timestamps = [
1598            "2023-12-25",
1599            "2023-12-25T10:30:45",
1600            "2023-12-25 10:30:45",
1601            "2023-12-25T10:30:45Z",
1602            "2001-12-14 21:59:43.10 -5", // Space-separated with timezone
1603            "2001-12-15T02:59:43.1Z",    // ISO 8601
1604            "2001-12-14t21:59:43.10-05:00", // Lowercase t
1605        ];
1606
1607        for ts in valid_timestamps {
1608            let scalar = ScalarValue::timestamp(ts);
1609            assert!(scalar.is_timestamp());
1610            assert_eq!(scalar.scalar_type(), ScalarType::Timestamp);
1611            assert_eq!(scalar.value(), ts);
1612
1613            let yaml_output = scalar.to_yaml_string();
1614            assert_eq!(yaml_output, format!("!!timestamp {}", ts));
1615
1616            // Test auto-detection recognizes it as timestamp
1617            let auto_scalar = ScalarValue::parse(ts);
1618            assert_eq!(
1619                auto_scalar.scalar_type(),
1620                ScalarType::Timestamp,
1621                "Failed to auto-detect '{}' as timestamp",
1622                ts
1623            );
1624        }
1625
1626        // Test invalid timestamps are not recognized
1627        let invalid_timestamps = [
1628            "not-a-date",
1629            "2023-13-01", // Invalid month
1630            "2023-12-32", // Invalid day
1631            "12:34:56",   // Time only (should be String)
1632            "2023/12/25", // Wrong separator
1633        ];
1634
1635        for ts in invalid_timestamps {
1636            let auto_scalar = ScalarValue::parse(ts);
1637            assert_ne!(
1638                auto_scalar.scalar_type(),
1639                ScalarType::Timestamp,
1640                "'{}' should not be detected as timestamp",
1641                ts
1642            );
1643        }
1644    }
1645
1646    #[test]
1647    fn test_regex_creation() {
1648        let pattern = r"^\d{3}-\d{2}-\d{4}$";
1649        let scalar = ScalarValue::regex(pattern);
1650
1651        assert!(scalar.is_regex());
1652        assert_eq!(scalar.scalar_type(), ScalarType::Regex);
1653        assert_eq!(scalar.value(), pattern);
1654
1655        let yaml_output = scalar.to_yaml_string();
1656        assert_eq!(yaml_output, format!("!!regex {}", pattern));
1657    }
1658
1659    #[test]
1660    fn test_regex_edge_cases() {
1661        // Test empty pattern
1662        let empty_regex = ScalarValue::regex("");
1663        assert!(empty_regex.is_regex());
1664        assert_eq!(empty_regex.value(), "");
1665        assert_eq!(empty_regex.to_yaml_string(), "!!regex ");
1666
1667        // Test pattern with special characters
1668        let special_chars = ScalarValue::regex(r"[.*+?^${}()|[\]\\]");
1669        assert!(special_chars.is_regex());
1670        assert_eq!(special_chars.value(), r"[.*+?^${}()|[\]\\]");
1671
1672        // Test unicode patterns
1673        let unicode_regex = ScalarValue::regex(r"\p{L}+");
1674        assert!(unicode_regex.is_regex());
1675        assert_eq!(unicode_regex.value(), r"\p{L}+");
1676
1677        // Test very long pattern
1678        let long_pattern = "a".repeat(1000);
1679        let long_regex = ScalarValue::regex(&long_pattern);
1680        assert!(long_regex.is_regex());
1681        assert_eq!(long_regex.value(), long_pattern);
1682
1683        // Test pattern with quotes and escapes
1684        let quoted_regex = ScalarValue::regex(r#"'quoted' and "double quoted" with \\ backslash"#);
1685        assert!(quoted_regex.is_regex());
1686        assert_eq!(
1687            quoted_regex.value(),
1688            r#"'quoted' and "double quoted" with \\ backslash"#
1689        );
1690    }
1691
1692    #[test]
1693    fn test_regex_type_coercion() {
1694        let regex_scalar = ScalarValue::regex(r"\d+");
1695
1696        // Test coercing regex to string
1697        let string_scalar = regex_scalar.coerce_to_type(ScalarType::String).unwrap();
1698        assert_eq!(string_scalar.scalar_type(), ScalarType::String);
1699        assert_eq!(string_scalar.value(), r"\d+");
1700        assert!(!string_scalar.is_regex());
1701
1702        // Test coercing string to regex
1703        let str_scalar = ScalarValue::string("test.*");
1704        let regex_from_string = str_scalar.coerce_to_type(ScalarType::Regex).unwrap();
1705        assert_eq!(regex_from_string.scalar_type(), ScalarType::Regex);
1706        assert_eq!(regex_from_string.value(), "test.*");
1707        assert!(regex_from_string.is_regex());
1708
1709        // Test that regex cannot be coerced to number types
1710        assert!(regex_scalar.coerce_to_type(ScalarType::Integer).is_none());
1711        assert!(regex_scalar.coerce_to_type(ScalarType::Float).is_none());
1712        assert!(regex_scalar.coerce_to_type(ScalarType::Boolean).is_none());
1713    }
1714
1715    #[test]
1716    #[cfg(feature = "regex")]
1717    fn test_regex_compilation() {
1718        // Test as_regex() with a regex scalar
1719        let regex_scalar = ScalarValue::regex(r"\d{3}-\d{4}");
1720        let compiled = regex_scalar.as_regex().unwrap();
1721        assert!(compiled.is_match("555-1234"));
1722        assert!(!compiled.is_match("not-a-phone"));
1723
1724        // Test as_regex() with a non-regex scalar returns None
1725        let string_scalar = ScalarValue::string("not a regex");
1726        assert!(string_scalar.as_regex().is_none());
1727
1728        // Test try_as_regex() with any scalar type
1729        let pattern_scalar = ScalarValue::string(r"^\w+@\w+\.\w+$");
1730        let email_regex = pattern_scalar.try_as_regex().unwrap();
1731        assert!(email_regex.is_match("test@example.com"));
1732        assert!(!email_regex.is_match("not-an-email"));
1733
1734        // Test with invalid regex pattern
1735        let invalid_scalar = ScalarValue::regex(r"[invalid(");
1736        assert!(invalid_scalar.as_regex().is_none());
1737
1738        // Test try_as_regex() with invalid pattern returns error
1739        let invalid_pattern = ScalarValue::string(r"[invalid(");
1740        assert!(invalid_pattern.try_as_regex().is_err());
1741    }
1742
1743    #[test]
1744    #[cfg(feature = "regex")]
1745    fn test_regex_extraction_use_cases() {
1746        // Test extracting and using regex for validation
1747        let validation_rules = [
1748            ScalarValue::regex(r"^\d{5}$"),                 // ZIP code
1749            ScalarValue::regex(r"^[A-Z]{2}$"),              // State code
1750            ScalarValue::regex(r"^\(\d{3}\) \d{3}-\d{4}$"), // Phone number
1751        ];
1752
1753        let test_values = ["12345", "CA", "(555) 123-4567"];
1754
1755        for (rule, value) in validation_rules.iter().zip(test_values.iter()) {
1756            let regex = rule.as_regex().unwrap();
1757            assert!(regex.is_match(value), "Pattern should match {}", value);
1758        }
1759
1760        // Test with complex regex patterns
1761        let email_regex = ScalarValue::regex(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$");
1762        let compiled = email_regex.as_regex().unwrap();
1763        assert!(compiled.is_match("user@example.com"));
1764        assert!(compiled.is_match("test.user+tag@sub.domain.org"));
1765        assert!(!compiled.is_match("invalid.email"));
1766
1767        // Test extracting capture groups
1768        let version_regex = ScalarValue::regex(r"^v(\d+)\.(\d+)\.(\d+)$");
1769        let compiled = version_regex.as_regex().unwrap();
1770        if let Some(captures) = compiled.captures("v1.2.3") {
1771            assert_eq!(captures.get(1).unwrap().as_str(), "1");
1772            assert_eq!(captures.get(2).unwrap().as_str(), "2");
1773            assert_eq!(captures.get(3).unwrap().as_str(), "3");
1774        } else {
1775            panic!("Should have matched version string");
1776        }
1777    }
1778
1779    #[test]
1780    fn test_type_coercion() {
1781        // Test coercing string to integer
1782        let str_scalar = ScalarValue::string("42");
1783        let int_scalar = str_scalar.coerce_to_type(ScalarType::Integer).unwrap();
1784        assert_eq!(int_scalar.scalar_type(), ScalarType::Integer);
1785        assert_eq!(int_scalar.value(), "42");
1786
1787        // Test coercing string to boolean
1788        let bool_scalar = ScalarValue::string("true")
1789            .coerce_to_type(ScalarType::Boolean)
1790            .unwrap();
1791        assert_eq!(bool_scalar.scalar_type(), ScalarType::Boolean);
1792        assert_eq!(bool_scalar.value(), "true");
1793
1794        // Test coercing boolean string variations
1795        let yes_scalar = ScalarValue::string("yes")
1796            .coerce_to_type(ScalarType::Boolean)
1797            .unwrap();
1798        assert_eq!(yes_scalar.value(), "true");
1799
1800        let no_scalar = ScalarValue::string("no")
1801            .coerce_to_type(ScalarType::Boolean)
1802            .unwrap();
1803        assert_eq!(no_scalar.value(), "false");
1804
1805        // Test failed coercion
1806        let str_scalar = ScalarValue::string("not_a_number");
1807        assert!(str_scalar.coerce_to_type(ScalarType::Integer).is_none());
1808    }
1809
1810    #[test]
1811    fn test_auto_type_detection() {
1812        // Test various automatic type detections
1813        assert_eq!(ScalarValue::auto_detect_type("42"), ScalarType::Integer);
1814        assert_eq!(ScalarValue::auto_detect_type("3.14"), ScalarType::Float);
1815        assert_eq!(ScalarValue::auto_detect_type("true"), ScalarType::Boolean);
1816        assert_eq!(ScalarValue::auto_detect_type("false"), ScalarType::Boolean);
1817        assert_eq!(ScalarValue::auto_detect_type("yes"), ScalarType::Boolean);
1818        assert_eq!(ScalarValue::auto_detect_type("null"), ScalarType::Null);
1819        assert_eq!(ScalarValue::auto_detect_type("~"), ScalarType::Null);
1820        assert_eq!(ScalarValue::auto_detect_type(""), ScalarType::Null);
1821        assert_eq!(
1822            ScalarValue::auto_detect_type("2023-12-25"),
1823            ScalarType::Timestamp
1824        );
1825        assert_eq!(
1826            ScalarValue::auto_detect_type("2023-12-25T10:30:45"),
1827            ScalarType::Timestamp
1828        );
1829        #[cfg(feature = "base64")]
1830        assert_eq!(
1831            ScalarValue::auto_detect_type("SGVsbG8gV29ybGQ="),
1832            ScalarType::Binary
1833        );
1834        #[cfg(not(feature = "base64"))]
1835        assert_eq!(
1836            ScalarValue::auto_detect_type("SGVsbG8gV29ybGQ="),
1837            ScalarType::String
1838        );
1839        assert_eq!(
1840            ScalarValue::auto_detect_type("hello world"),
1841            ScalarType::String
1842        );
1843    }
1844
1845    #[test]
1846    fn test_from_yaml_scalar_creation() {
1847        let int_scalar = ScalarValue::parse("123");
1848        assert_eq!(int_scalar.scalar_type(), ScalarType::Integer);
1849
1850        let bool_scalar = ScalarValue::parse("true");
1851        assert_eq!(bool_scalar.scalar_type(), ScalarType::Boolean);
1852
1853        let timestamp_scalar = ScalarValue::parse("2023-12-25");
1854        assert_eq!(timestamp_scalar.scalar_type(), ScalarType::Timestamp);
1855
1856        let string_scalar = ScalarValue::parse("hello world");
1857        assert_eq!(string_scalar.scalar_type(), ScalarType::String);
1858    }
1859
1860    #[test]
1861    fn test_timestamp_pattern_matching() {
1862        // Valid patterns
1863        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25"));
1864        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25T10:30:45"));
1865        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25t10:30:45")); // Lowercase t
1866        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25 10:30:45"));
1867        assert!(ScalarValue::matches_iso8601_pattern("2023-01-01T00:00:00"));
1868        assert!(ScalarValue::matches_iso8601_pattern(
1869            "2001-12-14t21:59:43.10-05:00"
1870        )); // Complex with lowercase t
1871
1872        // Invalid patterns
1873        assert!(!ScalarValue::matches_iso8601_pattern("2023-13-25")); // Invalid month
1874        assert!(!ScalarValue::matches_iso8601_pattern("23-12-25")); // Wrong year format
1875        assert!(!ScalarValue::matches_iso8601_pattern("2023/12/25")); // Wrong separator
1876        assert!(!ScalarValue::matches_iso8601_pattern("not-a-date"));
1877        assert!(!ScalarValue::matches_iso8601_pattern("2023"));
1878    }
1879
1880    #[test]
1881    #[cfg(feature = "base64")]
1882    fn test_base64_detection() {
1883        // Valid base64 strings
1884        assert!(ScalarValue::looks_like_base64("SGVsbG8=")); // "Hello"
1885        assert!(ScalarValue::looks_like_base64("V29ybGQ=")); // "World"
1886        assert!(ScalarValue::looks_like_base64("SGVsbG8gV29ybGQ=")); // "Hello World"
1887        assert!(ScalarValue::looks_like_base64("AAAA")); // All A's
1888
1889        // Invalid base64 strings
1890        assert!(!ScalarValue::looks_like_base64("Hello")); // No padding, wrong chars
1891        assert!(!ScalarValue::looks_like_base64("SGVsbG8")); // Missing padding (7 chars, should be 8 with padding)
1892        assert!(!ScalarValue::looks_like_base64("")); // Empty
1893        assert!(!ScalarValue::looks_like_base64("SGV@")); // Invalid character
1894        assert!(!ScalarValue::looks_like_base64("SGVsbG8g===")); // Too much padding
1895    }
1896
1897    #[test]
1898    #[cfg(feature = "base64")]
1899    fn test_binary_yaml_output_with_tags() {
1900        let data = b"Binary data here";
1901        let scalar = ScalarValue::binary(data);
1902        let yaml_output = scalar.to_yaml_string();
1903
1904        assert!(yaml_output.starts_with("!!binary "));
1905
1906        // Extract just the base64 part
1907        let base64_part = &yaml_output[9..]; // Remove "!!binary "
1908        let decoded = base64_decode(base64_part).expect("Should decode");
1909        assert_eq!(decoded, data);
1910    }
1911
1912    #[test]
1913    #[cfg(feature = "base64")]
1914    fn test_special_data_types_with_different_styles() {
1915        // Binary with different styles should still include tag
1916        let data = b"test";
1917        let binary_scalar = ScalarValue::binary(data);
1918
1919        // Even if we change style, binary type should maintain tag
1920        let mut styled_binary = binary_scalar;
1921        styled_binary.style = ScalarStyle::DoubleQuoted;
1922
1923        // The to_yaml_string should still respect the scalar type for tagging
1924        assert_eq!(styled_binary.to_yaml_string(), "!!binary \"dGVzdA==\"");
1925    }
1926
1927    #[test]
1928    fn test_type_checking_methods() {
1929        #[cfg(feature = "base64")]
1930        let binary_scalar = ScalarValue::binary(b"test");
1931        let timestamp_scalar = ScalarValue::timestamp("2023-12-25");
1932        let regex_scalar = ScalarValue::regex(r"\d+");
1933        let string_scalar = ScalarValue::string("hello");
1934
1935        // Test type checking methods
1936        #[cfg(feature = "base64")]
1937        assert!(binary_scalar.is_binary());
1938        #[cfg(feature = "base64")]
1939        assert!(!binary_scalar.is_timestamp());
1940        #[cfg(feature = "base64")]
1941        assert!(!binary_scalar.is_regex());
1942
1943        #[cfg(feature = "base64")]
1944        assert!(!timestamp_scalar.is_binary());
1945        assert!(timestamp_scalar.is_timestamp());
1946        assert!(!timestamp_scalar.is_regex());
1947
1948        #[cfg(feature = "base64")]
1949        assert!(!regex_scalar.is_binary());
1950        assert!(!regex_scalar.is_timestamp());
1951        assert!(regex_scalar.is_regex());
1952
1953        #[cfg(feature = "base64")]
1954        assert!(!string_scalar.is_binary());
1955        assert!(!string_scalar.is_timestamp());
1956        assert!(!string_scalar.is_regex());
1957    }
1958
1959    #[test]
1960    fn test_binary_number_parsing() {
1961        // Test binary number parsing (0b prefix)
1962        assert_eq!(ScalarValue::parse_integer("0b1010"), Some(10));
1963        assert_eq!(ScalarValue::parse_integer("0b11111111"), Some(255));
1964        assert_eq!(ScalarValue::parse_integer("0B101"), Some(5)); // Uppercase B
1965        assert_eq!(ScalarValue::parse_integer("-0b1010"), Some(-10));
1966        assert_eq!(ScalarValue::parse_integer("+0b101"), Some(5));
1967
1968        // Test auto-detection
1969        assert_eq!(ScalarValue::auto_detect_type("0b1010"), ScalarType::Integer);
1970        assert_eq!(
1971            ScalarValue::auto_detect_type("0B11111111"),
1972            ScalarType::Integer
1973        );
1974
1975        // Test invalid binary
1976        assert_eq!(ScalarValue::parse_integer("0b1012"), None); // Contains invalid digit
1977        assert_eq!(ScalarValue::parse_integer("0b"), None); // Empty after prefix
1978    }
1979
1980    #[test]
1981    fn test_modern_octal_number_parsing() {
1982        // Test modern octal number parsing (0o prefix)
1983        assert_eq!(ScalarValue::parse_integer("0o755"), Some(493)); // 7*64 + 5*8 + 5
1984        assert_eq!(ScalarValue::parse_integer("0o644"), Some(420)); // 6*64 + 4*8 + 4
1985        assert_eq!(ScalarValue::parse_integer("0O777"), Some(511)); // Uppercase O
1986        assert_eq!(ScalarValue::parse_integer("-0o755"), Some(-493));
1987        assert_eq!(ScalarValue::parse_integer("+0o644"), Some(420));
1988
1989        // Test auto-detection
1990        assert_eq!(ScalarValue::auto_detect_type("0o755"), ScalarType::Integer);
1991        assert_eq!(ScalarValue::auto_detect_type("0O644"), ScalarType::Integer);
1992
1993        // Test invalid octal
1994        assert_eq!(ScalarValue::parse_integer("0o789"), None); // Contains invalid digit
1995        assert_eq!(ScalarValue::parse_integer("0o"), None); // Empty after prefix
1996    }
1997
1998    #[test]
1999    fn test_legacy_octal_number_parsing() {
2000        // Test legacy octal number parsing (0 prefix)
2001        assert_eq!(ScalarValue::parse_integer("0755"), Some(493));
2002        assert_eq!(ScalarValue::parse_integer("0644"), Some(420));
2003        assert_eq!(ScalarValue::parse_integer("0777"), Some(511));
2004
2005        // Test auto-detection
2006        assert_eq!(ScalarValue::auto_detect_type("0755"), ScalarType::Integer);
2007        assert_eq!(ScalarValue::auto_detect_type("0644"), ScalarType::Integer);
2008
2009        // Test edge cases
2010        assert_eq!(ScalarValue::parse_integer("0"), Some(0)); // Single zero
2011        assert_eq!(ScalarValue::parse_integer("00"), Some(0)); // Double zero
2012
2013        // Numbers starting with 0 but containing 8 or 9 should fail as octal
2014        assert_eq!(ScalarValue::parse_integer("0789"), None);
2015        assert_eq!(ScalarValue::parse_integer("0128"), None);
2016    }
2017
2018    #[test]
2019    fn test_hexadecimal_number_parsing() {
2020        // Test hexadecimal number parsing (0x prefix) - should still work
2021        assert_eq!(ScalarValue::parse_integer("0xFF"), Some(255));
2022        assert_eq!(ScalarValue::parse_integer("0x1A"), Some(26));
2023        assert_eq!(ScalarValue::parse_integer("0XFF"), Some(255)); // Uppercase X
2024        assert_eq!(ScalarValue::parse_integer("-0xFF"), Some(-255));
2025        assert_eq!(ScalarValue::parse_integer("+0x1A"), Some(26));
2026
2027        // Test auto-detection
2028        assert_eq!(ScalarValue::auto_detect_type("0xFF"), ScalarType::Integer);
2029        assert_eq!(ScalarValue::auto_detect_type("0X1A"), ScalarType::Integer);
2030    }
2031
2032    #[test]
2033    fn test_decimal_number_parsing() {
2034        // Test decimal number parsing (no prefix) - should still work
2035        assert_eq!(ScalarValue::parse_integer("42"), Some(42));
2036        assert_eq!(ScalarValue::parse_integer("123"), Some(123));
2037        assert_eq!(ScalarValue::parse_integer("-42"), Some(-42));
2038        assert_eq!(ScalarValue::parse_integer("+123"), Some(123));
2039
2040        // Test auto-detection
2041        assert_eq!(ScalarValue::auto_detect_type("42"), ScalarType::Integer);
2042        assert_eq!(ScalarValue::auto_detect_type("-123"), ScalarType::Integer);
2043    }
2044
2045    #[test]
2046    fn test_number_format_yaml_output() {
2047        // Test that different number formats are properly detected and output
2048        let binary_scalar = ScalarValue::parse("0b1010");
2049        assert_eq!(binary_scalar.scalar_type(), ScalarType::Integer);
2050        assert_eq!(binary_scalar.value(), "0b1010");
2051
2052        let octal_scalar = ScalarValue::parse("0o755");
2053        assert_eq!(octal_scalar.scalar_type(), ScalarType::Integer);
2054        assert_eq!(octal_scalar.value(), "0o755");
2055
2056        let hex_scalar = ScalarValue::parse("0xFF");
2057        assert_eq!(hex_scalar.scalar_type(), ScalarType::Integer);
2058        assert_eq!(hex_scalar.value(), "0xFF");
2059
2060        let legacy_octal_scalar = ScalarValue::parse("0755");
2061        assert_eq!(legacy_octal_scalar.scalar_type(), ScalarType::Integer);
2062        assert_eq!(legacy_octal_scalar.value(), "0755");
2063    }
2064}