Skip to main content

yaml_edit/
scalar.rs

1//! Scalar value wrapper with proper escaping and style support.
2
3use std::fmt;
4
5#[cfg(feature = "base64")]
6use base64::{engine::general_purpose, Engine as _};
7
8/// Base64 encode bytes for binary data
9#[cfg(feature = "base64")]
10fn base64_encode(input: &[u8]) -> String {
11    general_purpose::STANDARD.encode(input)
12}
13
14/// Base64 decode string back to bytes
15#[cfg(feature = "base64")]
16fn base64_decode(input: &str) -> Result<Vec<u8>, String> {
17    general_purpose::STANDARD
18        .decode(input.trim())
19        .map_err(|e| format!("Base64 decode error: {}", e))
20}
21
22/// Style of scalar representation in YAML
23#[derive(Debug, Clone, Copy, PartialEq, Eq)]
24pub enum ScalarStyle {
25    /// Plain scalar (no quotes)
26    Plain,
27    /// Single-quoted scalar
28    SingleQuoted,
29    /// Double-quoted scalar
30    DoubleQuoted,
31    /// Literal scalar (|)
32    Literal,
33    /// Folded scalar (>)
34    Folded,
35}
36
37/// Type of a scalar value
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
39pub enum ScalarType {
40    /// String value
41    String,
42    /// Integer value
43    Integer,
44    /// Float value
45    Float,
46    /// Boolean value
47    Boolean,
48    /// Null value
49    Null,
50    /// Binary data (base64 encoded)
51    #[cfg(feature = "base64")]
52    Binary,
53    /// Timestamp value
54    Timestamp,
55    /// Regular expression
56    Regex,
57}
58
59/// A scalar value with metadata about its style and content
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct ScalarValue {
62    /// The actual value
63    value: String,
64    /// The style to use when rendering
65    style: ScalarStyle,
66    /// The type of the scalar
67    scalar_type: ScalarType,
68}
69
70impl ScalarValue {
71    /// Create a scalar value explicitly treating it as a string (no type auto-detection)
72    ///
73    /// This method always creates a `String` type scalar. The value will be properly
74    /// quoted if needed when rendering to YAML, but no type detection is performed.
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use yaml_edit::ScalarValue;
80    ///
81    /// let scalar = ScalarValue::string("123");
82    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
83    /// // Renders with quotes to distinguish from integer
84    /// assert_eq!(scalar.to_yaml_string(), "'123'");
85    ///
86    /// let scalar = ScalarValue::string("true");
87    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
88    /// // Renders with quotes to distinguish from boolean
89    /// assert_eq!(scalar.to_yaml_string(), "'true'");
90    ///
91    /// let scalar = ScalarValue::string("hello");
92    /// assert_eq!(scalar.scalar_type(), yaml_edit::ScalarType::String);
93    /// // Plain strings don't need quotes
94    /// assert_eq!(scalar.to_yaml_string(), "hello");
95    /// ```
96    ///
97    /// For YAML-style type detection (parsing "123" as Integer, "true" as Boolean),
98    /// use [`ScalarValue::parse()`] instead.
99    pub fn string(value: impl Into<String>) -> Self {
100        let value = value.into();
101        let style = Self::detect_style(&value);
102        // Detect the type - default to String for user-provided values
103        let scalar_type = ScalarType::String;
104        Self {
105            value,
106            style,
107            scalar_type,
108        }
109    }
110
111    /// Parse escape sequences in a double-quoted string
112    pub fn parse_escape_sequences(text: &str) -> String {
113        let mut result = String::with_capacity(text.len()); // Pre-allocate
114        let mut chars = text.chars().peekable();
115
116        while let Some(ch) = chars.next() {
117            if ch == '\\' {
118                if let Some(&escaped) = chars.peek() {
119                    chars.next(); // consume the escaped character
120                    match escaped {
121                        // Standard escape sequences
122                        'n' => result.push('\n'),
123                        't' => result.push('\t'),
124                        'r' => result.push('\r'),
125                        'b' => result.push('\x08'),
126                        'f' => result.push('\x0C'),
127                        'a' => result.push('\x07'), // bell
128                        'e' => result.push('\x1B'), // escape
129                        'v' => result.push('\x0B'), // vertical tab
130                        '0' => result.push('\0'),   // null
131                        '\\' => result.push('\\'),
132                        '"' => result.push('"'),
133                        '\'' => result.push('\''),
134                        '/' => result.push('/'),
135                        // Line break escape (YAML specific)
136                        ' ' => {
137                            // Escaped space followed by line break - line folding
138                            if let Some(&'\n') = chars.peek() {
139                                chars.next(); // consume the newline
140                                              // In YAML, escaped line breaks are folded to nothing
141                                continue;
142                            } else {
143                                result.push(' ');
144                            }
145                        }
146                        '\n' => {
147                            // Escaped line break - removes the line break
148                            continue;
149                        }
150                        // Unicode escapes
151                        'x' => {
152                            // \xNN - 2-digit hex
153                            let mut hex_chars = [0u8; 2];
154                            let mut count = 0;
155                            for (i, ch) in chars.by_ref().take(2).enumerate() {
156                                if let Some(digit) = ch.to_digit(16) {
157                                    hex_chars[i] = digit as u8;
158                                    count += 1;
159                                } else {
160                                    // Put back invalid char
161                                    result.push('\\');
162                                    result.push('x');
163                                    for &hex_char in hex_chars.iter().take(count) {
164                                        result.push(char::from_digit(hex_char as u32, 16).unwrap());
165                                    }
166                                    result.push(ch);
167                                    break;
168                                }
169                            }
170                            if count == 2 {
171                                let code = hex_chars[0] * 16 + hex_chars[1];
172                                result.push(code as char);
173                            } else if count > 0 {
174                                // Incomplete hex escape
175                                result.push('\\');
176                                result.push('x');
177                                for &hex_char in hex_chars.iter().take(count) {
178                                    result.push(char::from_digit(hex_char as u32, 16).unwrap());
179                                }
180                            }
181                        }
182                        'u' => {
183                            // \uNNNN - 4-digit hex
184                            let hex_digits: String = chars.by_ref().take(4).collect();
185                            if hex_digits.len() == 4 {
186                                if let Ok(code) = u16::from_str_radix(&hex_digits, 16) {
187                                    if let Some(unicode_char) = char::from_u32(code as u32) {
188                                        result.push(unicode_char);
189                                    } else {
190                                        // Invalid Unicode code point
191                                        result.push('\\');
192                                        result.push('u');
193                                        result.push_str(&hex_digits);
194                                    }
195                                } else {
196                                    // Invalid hex
197                                    result.push('\\');
198                                    result.push('u');
199                                    result.push_str(&hex_digits);
200                                }
201                            } else {
202                                // Incomplete hex escape
203                                result.push('\\');
204                                result.push('u');
205                                result.push_str(&hex_digits);
206                            }
207                        }
208                        'U' => {
209                            // \UNNNNNNNN - 8-digit hex
210                            let hex_digits: String = chars.by_ref().take(8).collect();
211                            if hex_digits.len() == 8 {
212                                if let Ok(code) = u32::from_str_radix(&hex_digits, 16) {
213                                    if let Some(unicode_char) = char::from_u32(code) {
214                                        result.push(unicode_char);
215                                    } else {
216                                        // Invalid Unicode code point
217                                        result.push('\\');
218                                        result.push('U');
219                                        result.push_str(&hex_digits);
220                                    }
221                                } else {
222                                    // Invalid hex
223                                    result.push('\\');
224                                    result.push('U');
225                                    result.push_str(&hex_digits);
226                                }
227                            } else {
228                                // Incomplete hex escape
229                                result.push('\\');
230                                result.push('U');
231                                result.push_str(&hex_digits);
232                            }
233                        }
234                        // Unknown escape sequence - preserve as literal
235                        _ => {
236                            result.push('\\');
237                            result.push(escaped);
238                        }
239                    }
240                } else {
241                    // Backslash at end of string
242                    result.push('\\');
243                }
244            } else {
245                result.push(ch);
246            }
247        }
248
249        result
250    }
251
252    /// Create a new scalar with a specific style
253    pub fn with_style(value: impl Into<String>, style: ScalarStyle) -> Self {
254        Self {
255            value: value.into(),
256            style,
257            scalar_type: ScalarType::String,
258        }
259    }
260
261    /// Create a plain scalar
262    pub fn plain(value: impl Into<String>) -> Self {
263        Self::with_style(value, ScalarStyle::Plain)
264    }
265
266    /// Create a single-quoted scalar
267    pub fn single_quoted(value: impl Into<String>) -> Self {
268        Self::with_style(value, ScalarStyle::SingleQuoted)
269    }
270
271    /// Create a double-quoted scalar
272    pub fn double_quoted(value: impl Into<String>) -> Self {
273        Self::with_style(value, ScalarStyle::DoubleQuoted)
274    }
275
276    /// Create a literal scalar
277    pub fn literal(value: impl Into<String>) -> Self {
278        Self::with_style(value, ScalarStyle::Literal)
279    }
280
281    /// Create a folded scalar
282    pub fn folded(value: impl Into<String>) -> Self {
283        Self::with_style(value, ScalarStyle::Folded)
284    }
285
286    /// Create a null scalar
287    pub fn null() -> Self {
288        Self {
289            value: "null".to_string(),
290            style: ScalarStyle::Plain,
291            scalar_type: ScalarType::Null,
292        }
293    }
294
295    /// Create a binary scalar from raw bytes
296    #[cfg(feature = "base64")]
297    pub fn binary(data: &[u8]) -> Self {
298        let encoded = base64_encode(data);
299        Self {
300            value: encoded,
301            style: ScalarStyle::Plain,
302            scalar_type: ScalarType::Binary,
303        }
304    }
305
306    /// Create a timestamp scalar
307    pub fn timestamp(value: impl Into<String>) -> Self {
308        Self {
309            value: value.into(),
310            style: ScalarStyle::Plain,
311            scalar_type: ScalarType::Timestamp,
312        }
313    }
314
315    /// Create a regex scalar
316    pub fn regex(pattern: impl Into<String>) -> Self {
317        Self {
318            value: pattern.into(),
319            style: ScalarStyle::Plain,
320            scalar_type: ScalarType::Regex,
321        }
322    }
323
324    /// Get the raw value
325    pub fn value(&self) -> &str {
326        &self.value
327    }
328
329    /// Get the style
330    pub fn style(&self) -> ScalarStyle {
331        self.style
332    }
333
334    /// Get the scalar type
335    pub fn scalar_type(&self) -> ScalarType {
336        self.scalar_type
337    }
338
339    /// Try to parse this scalar as an `i64`.
340    ///
341    /// Returns `None` if the scalar type is not `Integer`.
342    pub fn to_i64(&self) -> Option<i64> {
343        if self.scalar_type == ScalarType::Integer {
344            Self::parse_integer(&self.value)
345        } else {
346            None
347        }
348    }
349
350    /// Try to parse this scalar as an `f64`.
351    ///
352    /// Returns `None` if the scalar type is not `Float`.
353    pub fn to_f64(&self) -> Option<f64> {
354        if self.scalar_type == ScalarType::Float {
355            self.value.trim().parse::<f64>().ok()
356        } else {
357            None
358        }
359    }
360
361    /// Try to parse this scalar as a `bool`.
362    ///
363    /// Returns `None` if the scalar type is not `Boolean`.
364    /// Recognizes: `true`, `false`, `yes`, `no`, `on`, `off` (case-insensitive).
365    pub fn to_bool(&self) -> Option<bool> {
366        if self.scalar_type == ScalarType::Boolean {
367            match self.value.to_lowercase().as_str() {
368                "true" | "yes" | "on" => Some(true),
369                "false" | "no" | "off" => Some(false),
370                _ => None,
371            }
372        } else {
373            None
374        }
375    }
376
377    /// Extract binary data if this is a binary scalar
378    #[cfg(feature = "base64")]
379    pub fn as_binary(&self) -> Option<Result<Vec<u8>, String>> {
380        match self.scalar_type {
381            ScalarType::Binary => Some(base64_decode(&self.value)),
382            _ => None,
383        }
384    }
385
386    /// Check if this is a binary scalar
387    #[cfg(feature = "base64")]
388    pub fn is_binary(&self) -> bool {
389        self.scalar_type == ScalarType::Binary
390    }
391
392    /// Check if this is a timestamp scalar
393    pub fn is_timestamp(&self) -> bool {
394        self.scalar_type == ScalarType::Timestamp
395    }
396
397    /// Check if this is a regex scalar
398    pub fn is_regex(&self) -> bool {
399        self.scalar_type == ScalarType::Regex
400    }
401
402    /// Compile and return a Regex object if this is a regex scalar
403    ///
404    /// This method is only available when the `regex` feature is enabled.
405    /// Returns None if this is not a regex scalar or if the pattern is invalid.
406    ///
407    /// # Example
408    /// ```
409    /// # #[cfg(feature = "regex")]
410    /// # {
411    /// use yaml_edit::ScalarValue;
412    ///
413    /// let scalar = ScalarValue::regex(r"\d{3}-\d{4}");
414    /// let regex = scalar.as_regex().unwrap();
415    /// assert!(regex.is_match("555-1234"));
416    /// # }
417    /// ```
418    #[cfg(feature = "regex")]
419    pub fn as_regex(&self) -> Option<regex::Regex> {
420        if self.scalar_type == ScalarType::Regex {
421            regex::Regex::new(&self.value).ok()
422        } else {
423            None
424        }
425    }
426
427    /// Try to compile this scalar as a regex, regardless of its type
428    ///
429    /// This method is only available when the `regex` feature is enabled.
430    /// This will attempt to compile the scalar value as a regex pattern,
431    /// even if it's not marked with the !!regex tag.
432    ///
433    /// # Example
434    /// ```
435    /// # #[cfg(feature = "regex")]
436    /// # {
437    /// use yaml_edit::ScalarValue;
438    ///
439    /// let scalar = ScalarValue::string(r"\d+");  // Plain string scalar
440    /// let regex = scalar.try_as_regex().unwrap();
441    /// assert!(regex.is_match("123"));
442    /// # }
443    /// ```
444    #[cfg(feature = "regex")]
445    pub fn try_as_regex(&self) -> Result<regex::Regex, regex::Error> {
446        regex::Regex::new(&self.value)
447    }
448
449    /// Try to coerce this scalar to the specified type
450    pub fn coerce_to_type(&self, target_type: ScalarType) -> Option<ScalarValue> {
451        if self.scalar_type == target_type {
452            return Some(self.clone());
453        }
454
455        match target_type {
456            ScalarType::String => Some(ScalarValue {
457                value: self.value.clone(),
458                style: ScalarStyle::Plain,
459                scalar_type: ScalarType::String,
460            }),
461            ScalarType::Integer => Self::parse_integer(&self.value).map(ScalarValue::from),
462            ScalarType::Float => self.value.parse::<f64>().ok().map(ScalarValue::from),
463            ScalarType::Boolean => match self.value.to_lowercase().as_str() {
464                "true" | "yes" | "on" | "1" => Some(ScalarValue::from(true)),
465                "false" | "no" | "off" | "0" => Some(ScalarValue::from(false)),
466                _ => None,
467            },
468            ScalarType::Null => match self.value.to_lowercase().as_str() {
469                "null" | "~" | "" => Some(ScalarValue::null()),
470                _ => None,
471            },
472            #[cfg(feature = "base64")]
473            ScalarType::Binary => {
474                // Try to decode as base64 to verify it's valid binary data
475                if base64_decode(&self.value).is_ok() {
476                    Some(ScalarValue {
477                        value: self.value.clone(),
478                        style: ScalarStyle::Plain,
479                        scalar_type: ScalarType::Binary,
480                    })
481                } else {
482                    None
483                }
484            }
485            ScalarType::Timestamp => {
486                // Basic timestamp format validation
487                if self.is_valid_timestamp(&self.value) {
488                    Some(ScalarValue::timestamp(&self.value))
489                } else {
490                    None
491                }
492            }
493            ScalarType::Regex => {
494                // For regex, just convert the value
495                Some(ScalarValue::regex(&self.value))
496            }
497        }
498    }
499
500    /// Parse an integer with support for various formats
501    /// Supports: decimal, hexadecimal (0x), binary (0b), octal (0o and legacy 0)
502    pub(crate) fn parse_integer(value: &str) -> Option<i64> {
503        let value = value.trim();
504
505        // Handle negative numbers
506        let (is_negative, value) = if let Some(stripped) = value.strip_prefix('-') {
507            (true, stripped)
508        } else if let Some(stripped) = value.strip_prefix('+') {
509            (false, stripped)
510        } else {
511            (false, value)
512        };
513
514        let parsed = if let Some(hex_part) = value
515            .strip_prefix("0x")
516            .or_else(|| value.strip_prefix("0X"))
517        {
518            // Hexadecimal
519            i64::from_str_radix(hex_part, 16).ok()
520        } else if let Some(bin_part) = value
521            .strip_prefix("0b")
522            .or_else(|| value.strip_prefix("0B"))
523        {
524            // Binary
525            i64::from_str_radix(bin_part, 2).ok()
526        } else if let Some(oct_part) = value
527            .strip_prefix("0o")
528            .or_else(|| value.strip_prefix("0O"))
529        {
530            // Modern octal
531            i64::from_str_radix(oct_part, 8).ok()
532        } else if value.starts_with('0')
533            && value.len() > 1
534            && value.chars().all(|c| c.is_ascii_digit())
535        {
536            // Legacy octal (starts with 0 but not 0x, 0b, 0o)
537            i64::from_str_radix(value, 8).ok()
538        } else {
539            // Decimal
540            value.parse::<i64>().ok()
541        };
542
543        parsed.map(|n| if is_negative { -n } else { n })
544    }
545
546    /// Auto-detect the most appropriate scalar type from a string value
547    pub fn auto_detect_type(value: &str) -> ScalarType {
548        // Check for null values first
549        match value.to_lowercase().as_str() {
550            "null" | "~" | "" => return ScalarType::Null,
551            _ => {}
552        }
553
554        // Check for boolean values
555        match value.to_lowercase().as_str() {
556            "true" | "false" | "yes" | "no" | "on" | "off" => return ScalarType::Boolean,
557            _ => {}
558        }
559
560        // Check for numbers with various formats
561        if Self::parse_integer(value).is_some() {
562            return ScalarType::Integer;
563        }
564        if value.parse::<f64>().is_ok() {
565            return ScalarType::Float;
566        }
567
568        // Check for timestamps (basic patterns)
569        if Self::is_valid_timestamp_static(value) {
570            return ScalarType::Timestamp;
571        }
572
573        // Check for binary data (base64)
574        #[cfg(feature = "base64")]
575        if Self::looks_like_base64(value) && base64_decode(value).is_ok() {
576            return ScalarType::Binary;
577        }
578
579        // Default to string
580        ScalarType::String
581    }
582
583    /// Parse a YAML scalar value with automatic type detection
584    ///
585    /// This method automatically detects the YAML type based on the content:
586    /// - "123" β†’ Integer
587    /// - "3.14" β†’ Float
588    /// - "true" / "false" β†’ Boolean
589    /// - "null" / "~" β†’ Null
590    /// - etc.
591    ///
592    /// # Examples
593    ///
594    /// ```
595    /// use yaml_edit::{ScalarValue, ScalarType};
596    ///
597    /// let scalar = ScalarValue::parse("123");
598    /// assert_eq!(scalar.scalar_type(), ScalarType::Integer);
599    /// assert_eq!(scalar.to_yaml_string(), "123");
600    ///
601    /// let scalar = ScalarValue::parse("true");
602    /// assert_eq!(scalar.scalar_type(), ScalarType::Boolean);
603    /// assert_eq!(scalar.to_yaml_string(), "true");
604    ///
605    /// let scalar = ScalarValue::parse("hello");
606    /// assert_eq!(scalar.scalar_type(), ScalarType::String);
607    /// assert_eq!(scalar.to_yaml_string(), "hello");
608    /// ```
609    ///
610    /// To create a String-type scalar without auto-detection (e.g., to represent
611    /// the string "123" rather than the integer 123), use [`ScalarValue::string()`] instead.
612    pub fn parse(value: impl Into<String>) -> Self {
613        let value = value.into();
614        let scalar_type = Self::auto_detect_type(&value);
615        // For non-string types, use Plain style (no quotes)
616        // For string types, detect appropriate style
617        let style = match scalar_type {
618            ScalarType::String => Self::detect_style(&value),
619            // All other types use plain style
620            _ => ScalarStyle::Plain,
621        };
622
623        Self {
624            value,
625            style,
626            scalar_type,
627        }
628    }
629
630    /// Create a ScalarValue from a Scalar syntax node, preserving the type from the lexer
631    ///
632    /// This extracts type information directly from the token kind (INT, BOOL, FLOAT, etc.)
633    /// rather than guessing based on heuristics. This is the correct way to convert
634    /// parsed YAML into ScalarValue.
635    pub fn from_scalar(scalar: &crate::yaml::Scalar) -> Self {
636        use crate::lex::SyntaxKind;
637        use rowan::ast::AstNode;
638
639        let value = scalar.as_string();
640
641        // Get the token kind from the first token in the scalar
642        let syntax_node = scalar.syntax();
643        let scalar_type = if let Some(token) = syntax_node.first_token() {
644            match token.kind() {
645                SyntaxKind::INT => ScalarType::Integer,
646                SyntaxKind::FLOAT => ScalarType::Float,
647                SyntaxKind::BOOL => ScalarType::Boolean,
648                SyntaxKind::NULL => ScalarType::Null,
649                SyntaxKind::STRING => ScalarType::String,
650                _ => ScalarType::String, // fallback
651            }
652        } else {
653            ScalarType::String
654        };
655
656        // Determine style based on the actual text (with quotes if present)
657        let raw_text = scalar.value();
658        let style = if raw_text.starts_with('"') && raw_text.ends_with('"') {
659            ScalarStyle::DoubleQuoted
660        } else if raw_text.starts_with('\'') && raw_text.ends_with('\'') {
661            ScalarStyle::SingleQuoted
662        } else {
663            ScalarStyle::Plain
664        };
665
666        Self {
667            value,
668            style,
669            scalar_type,
670        }
671    }
672
673    /// Check if a string looks like base64 encoded data
674    #[cfg(feature = "base64")]
675    fn looks_like_base64(value: &str) -> bool {
676        if value.is_empty() {
677            return false;
678        }
679
680        // Must be reasonable length and contain only base64 characters
681        // Also need to check that padding is only at the end
682        if value.len() < 4 || value.len() % 4 != 0 {
683            return false;
684        }
685
686        let padding_count = value.chars().filter(|&c| c == '=').count();
687        if padding_count > 2 {
688            return false;
689        }
690
691        // Check all characters are valid base64
692        if !value
693            .chars()
694            .all(|c| matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '+' | '/' | '='))
695        {
696            return false;
697        }
698
699        // Check that padding is only at the end
700        if padding_count > 0 {
701            let padding_start = value.len() - padding_count;
702            if !value[padding_start..].chars().all(|c| c == '=') {
703                return false;
704            }
705            // Check that non-padding part doesn't contain '='
706            if value[..padding_start].contains('=') {
707                return false;
708            }
709        }
710
711        // Final validation: try to decode it to ensure it's actually valid base64
712        // This will catch cases like "SGVs" which looks valid but isn't proper base64
713        base64_decode(value).is_ok()
714    }
715
716    /// Basic timestamp format validation
717    fn is_valid_timestamp(&self, value: &str) -> bool {
718        Self::is_valid_timestamp_static(value)
719    }
720
721    /// Static version of timestamp validation
722    fn is_valid_timestamp_static(value: &str) -> bool {
723        // Basic patterns for common timestamp formats
724        // ISO 8601: YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS etc.
725        if Self::matches_iso8601_pattern(value) {
726            return true;
727        }
728
729        // Unix timestamp (seconds since epoch)
730        if let Ok(timestamp) = value.parse::<u64>() {
731            // Reasonable range: between 1970 and 2100
732            return timestamp > 0 && timestamp < 4_102_444_800; // 2100-01-01
733        }
734
735        false
736    }
737
738    /// Simple pattern matching for ISO 8601 timestamps
739    fn matches_iso8601_pattern(value: &str) -> bool {
740        let chars: Vec<char> = value.chars().collect();
741
742        // Must be at least YYYY-MM-DD (10 chars)
743        if chars.len() < 10 {
744            return false;
745        }
746
747        // Check YYYY-MM-DD pattern
748        if !(chars[0..4].iter().all(|c| c.is_ascii_digit())
749            && chars[4] == '-'
750            && chars[5..7].iter().all(|c| c.is_ascii_digit())
751            && chars[7] == '-'
752            && chars[8..10].iter().all(|c| c.is_ascii_digit()))
753        {
754            return false;
755        }
756
757        // Validate month and day ranges (basic validation)
758        let month_str: String = chars[5..7].iter().collect();
759        let day_str: String = chars[8..10].iter().collect();
760
761        if let (Ok(month), Ok(day)) = (month_str.parse::<u8>(), day_str.parse::<u8>()) {
762            if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
763                return false;
764            }
765        } else {
766            return false;
767        }
768
769        // If it's just YYYY-MM-DD, that's valid
770        if chars.len() == 10 {
771            return true;
772        }
773
774        // Check for time part: T or t or space followed by HH:MM:SS
775        if chars.len() >= 19 {
776            let sep = chars[10];
777            if (sep == 'T' || sep == 't' || sep == ' ')
778                && chars[11..13].iter().all(|c| c.is_ascii_digit())
779                && chars[13] == ':'
780                && chars[14..16].iter().all(|c| c.is_ascii_digit())
781                && chars[16] == ':'
782                && chars[17..19].iter().all(|c| c.is_ascii_digit())
783            {
784                // Validate hour, minute, second ranges
785                let hour_str: String = chars[11..13].iter().collect();
786                let minute_str: String = chars[14..16].iter().collect();
787                let second_str: String = chars[17..19].iter().collect();
788
789                if let (Ok(hour), Ok(minute), Ok(second)) = (
790                    hour_str.parse::<u8>(),
791                    minute_str.parse::<u8>(),
792                    second_str.parse::<u8>(),
793                ) {
794                    if hour > 23 || minute > 59 || second > 59 {
795                        return false;
796                    }
797                } else {
798                    return false;
799                }
800
801                return true;
802            }
803        }
804
805        false
806    }
807
808    /// Detect the appropriate style for a value
809    fn detect_style(value: &str) -> ScalarStyle {
810        // Check if value needs quoting
811        if Self::needs_quoting(value) {
812            // Prefer single quotes if no single quotes in value
813            if !value.contains('\'') {
814                ScalarStyle::SingleQuoted
815            } else {
816                ScalarStyle::DoubleQuoted
817            }
818        } else if value.contains('\n') {
819            // Multi-line strings use literal style
820            ScalarStyle::Literal
821        } else {
822            ScalarStyle::Plain
823        }
824    }
825
826    /// Check if a value needs quoting when treated as a string
827    fn needs_quoting(value: &str) -> bool {
828        // Empty string needs quotes
829        if value.is_empty() {
830            return true;
831        }
832
833        // Check for YAML keywords that would be misinterpreted
834        // These need quotes when we want them as strings
835        if value.eq_ignore_ascii_case("true")
836            || value.eq_ignore_ascii_case("false")
837            || value.eq_ignore_ascii_case("yes")
838            || value.eq_ignore_ascii_case("no")
839            || value.eq_ignore_ascii_case("on")
840            || value.eq_ignore_ascii_case("off")
841            || value.eq_ignore_ascii_case("null")
842            || value == "~"
843        {
844            return true;
845        }
846
847        // Also quote things that look like numbers to preserve them as strings
848        if value.parse::<f64>().is_ok() || Self::parse_integer(value).is_some() {
849            return true;
850        }
851
852        // Check if starts with special characters
853        if value.starts_with(|ch: char| {
854            matches!(ch, '-' | '?' | '[' | ']' | '{' | '}' | ',' | '>' | '<')
855        }) {
856            return true;
857        }
858
859        // Check for special characters that require quoting
860        // : and # need context-aware checking (only ambiguous before whitespace or at end)
861        let mut chars = value.chars().peekable();
862        while let Some(ch) = chars.next() {
863            match ch {
864                '&' | '*' | '!' | '|' | '\'' | '"' | '%' => return true,
865                ':' | '#' if chars.peek().map_or(true, |next| next.is_whitespace()) => {
866                    return true;
867                }
868                _ => {}
869            }
870        }
871
872        // Leading/trailing whitespace needs quotes
873        if value != value.trim() {
874            return true;
875        }
876
877        false
878    }
879
880    /// Render the scalar as a YAML string with proper escaping
881    pub fn to_yaml_string(&self) -> String {
882        // For special data types, always include the tag regardless of style
883        let tag_prefix = match self.scalar_type {
884            #[cfg(feature = "base64")]
885            ScalarType::Binary => "!!binary ",
886            ScalarType::Timestamp => "!!timestamp ",
887            ScalarType::Regex => "!!regex ",
888            _ => "",
889        };
890
891        let content = match self.style {
892            ScalarStyle::Plain => {
893                // Check if we need to quote based on type vs content
894                match self.scalar_type {
895                    ScalarType::String => {
896                        // For strings, quote if the content looks like a special value
897                        if Self::needs_quoting(&self.value) {
898                            self.to_single_quoted()
899                        } else {
900                            self.value.clone()
901                        }
902                    }
903                    // For non-strings, output as plain (unquoted)
904                    ScalarType::Integer
905                    | ScalarType::Float
906                    | ScalarType::Boolean
907                    | ScalarType::Null
908                    | ScalarType::Timestamp
909                    | ScalarType::Regex => self.value.clone(),
910                    #[cfg(feature = "base64")]
911                    ScalarType::Binary => self.value.clone(),
912                }
913            }
914            ScalarStyle::SingleQuoted => self.to_single_quoted(),
915            ScalarStyle::DoubleQuoted => self.to_double_quoted(),
916            ScalarStyle::Literal => self.to_literal(),
917            ScalarStyle::Folded => self.to_folded(),
918        };
919
920        format!("{}{}", tag_prefix, content)
921    }
922
923    /// Convert to single-quoted string
924    fn to_single_quoted(&self) -> String {
925        // Escape single quotes by doubling them
926        let escaped = self.value.replace('\'', "''");
927        format!("'{}'", escaped)
928    }
929
930    /// Convert to double-quoted string
931    fn to_double_quoted(&self) -> String {
932        let mut result = String::from("\"");
933        for ch in self.value.chars() {
934            match ch {
935                '"' => result.push_str("\\\""),
936                '\\' => result.push_str("\\\\"),
937                '\n' => result.push_str("\\n"),
938                '\r' => result.push_str("\\r"),
939                '\t' => result.push_str("\\t"),
940                '\x08' => result.push_str("\\b"),
941                '\x0C' => result.push_str("\\f"),
942                '\x07' => result.push_str("\\a"), // bell
943                '\x1B' => result.push_str("\\e"), // escape
944                '\x0B' => result.push_str("\\v"), // vertical tab
945                '\0' => result.push_str("\\0"),   // null
946                c if c.is_control() || (c as u32) > 0x7F => {
947                    // Handle Unicode characters and control characters
948                    let code_point = c as u32;
949                    if code_point <= 0xFF {
950                        result.push_str(&format!("\\x{:02X}", code_point));
951                    } else if code_point <= 0xFFFF {
952                        result.push_str(&format!("\\u{:04X}", code_point));
953                    } else {
954                        result.push_str(&format!("\\U{:08X}", code_point));
955                    }
956                }
957                c => result.push(c),
958            }
959        }
960        result.push('"');
961        result
962    }
963
964    /// Convert to literal block scalar
965    fn to_literal(&self) -> String {
966        self.to_literal_with_indent(2)
967    }
968
969    /// Convert to folded block scalar
970    fn to_folded(&self) -> String {
971        self.to_folded_with_indent(2)
972    }
973
974    /// Convert to literal block scalar with specific indentation
975    pub fn to_literal_with_indent(&self, indent: usize) -> String {
976        let indent_str = " ".repeat(indent);
977
978        // Detect the existing indentation of the content
979        let existing_indent = self.detect_content_indentation();
980
981        // If content already has consistent indentation, preserve it
982        if existing_indent.is_some() {
983            format!("|\n{}", self.value)
984        } else {
985            // Add consistent indentation
986            let indented = self
987                .value
988                .lines()
989                .map(|line| {
990                    if line.trim().is_empty() {
991                        String::new()
992                    } else {
993                        format!("{}{}", indent_str, line)
994                    }
995                })
996                .collect::<Vec<_>>()
997                .join("\n");
998            format!("|\n{}", indented)
999        }
1000    }
1001
1002    /// Convert to folded block scalar with specific indentation
1003    pub fn to_folded_with_indent(&self, indent: usize) -> String {
1004        let indent_str = " ".repeat(indent);
1005
1006        // Detect the existing indentation of the content
1007        let existing_indent = self.detect_content_indentation();
1008
1009        // If content already has consistent indentation, preserve it
1010        if existing_indent.is_some() {
1011            format!(">\n{}", self.value)
1012        } else {
1013            // Add consistent indentation
1014            let indented = self
1015                .value
1016                .lines()
1017                .map(|line| {
1018                    if line.trim().is_empty() {
1019                        String::new()
1020                    } else {
1021                        format!("{}{}", indent_str, line)
1022                    }
1023                })
1024                .collect::<Vec<_>>()
1025                .join("\n");
1026            format!(">\n{}", indented)
1027        }
1028    }
1029
1030    /// Detect the minimum indentation level of non-empty lines in the content
1031    fn detect_content_indentation(&self) -> Option<usize> {
1032        let non_empty_lines: Vec<&str> = self
1033            .value
1034            .lines()
1035            .filter(|line| !line.trim().is_empty())
1036            .collect();
1037
1038        if non_empty_lines.is_empty() {
1039            return None;
1040        }
1041
1042        let mut min_indent = None;
1043        let mut all_have_same_indent = true;
1044
1045        for line in non_empty_lines {
1046            let indent = line.len() - line.trim_start().len();
1047            match min_indent {
1048                None => min_indent = Some(indent),
1049                Some(current_min) => {
1050                    if indent != current_min {
1051                        all_have_same_indent = false;
1052                    }
1053                    min_indent = Some(current_min.min(indent));
1054                }
1055            }
1056        }
1057
1058        // Only preserve indentation if all lines have some consistent structure
1059        if all_have_same_indent && min_indent.unwrap_or(0) > 0 {
1060            min_indent
1061        } else {
1062            None
1063        }
1064    }
1065}
1066
1067impl fmt::Display for ScalarValue {
1068    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1069        write!(f, "{}", self.to_yaml_string())
1070    }
1071}
1072
1073impl From<String> for ScalarValue {
1074    fn from(value: String) -> Self {
1075        Self::string(value)
1076    }
1077}
1078
1079impl From<&str> for ScalarValue {
1080    fn from(value: &str) -> Self {
1081        Self::string(value)
1082    }
1083}
1084
1085impl From<i32> for ScalarValue {
1086    fn from(value: i32) -> Self {
1087        Self {
1088            value: value.to_string(),
1089            style: ScalarStyle::Plain,
1090            scalar_type: ScalarType::Integer,
1091        }
1092    }
1093}
1094
1095impl From<i64> for ScalarValue {
1096    fn from(value: i64) -> Self {
1097        Self {
1098            value: value.to_string(),
1099            style: ScalarStyle::Plain,
1100            scalar_type: ScalarType::Integer,
1101        }
1102    }
1103}
1104
1105impl From<f32> for ScalarValue {
1106    fn from(value: f32) -> Self {
1107        Self {
1108            value: value.to_string(),
1109            style: ScalarStyle::Plain,
1110            scalar_type: ScalarType::Float,
1111        }
1112    }
1113}
1114
1115impl From<f64> for ScalarValue {
1116    fn from(value: f64) -> Self {
1117        Self {
1118            value: value.to_string(),
1119            style: ScalarStyle::Plain,
1120            scalar_type: ScalarType::Float,
1121        }
1122    }
1123}
1124
1125impl From<bool> for ScalarValue {
1126    fn from(value: bool) -> Self {
1127        Self {
1128            value: if value { "true" } else { "false" }.to_string(),
1129            style: ScalarStyle::Plain,
1130            scalar_type: ScalarType::Boolean,
1131        }
1132    }
1133}
1134
1135impl From<crate::yaml::Scalar> for ScalarValue {
1136    fn from(scalar: crate::yaml::Scalar) -> Self {
1137        let value = scalar.as_string();
1138        ScalarValue::parse(&value)
1139    }
1140}
1141
1142impl crate::AsYaml for ScalarValue {
1143    fn as_node(&self) -> Option<&crate::yaml::SyntaxNode> {
1144        None
1145    }
1146
1147    fn kind(&self) -> crate::as_yaml::YamlKind {
1148        crate::as_yaml::YamlKind::Scalar
1149    }
1150
1151    fn build_content(
1152        &self,
1153        builder: &mut rowan::GreenNodeBuilder,
1154        _indent: usize,
1155        _flow_context: bool,
1156    ) -> bool {
1157        use crate::lex::SyntaxKind;
1158        let token_kind = match self.scalar_type() {
1159            ScalarType::Integer => SyntaxKind::INT,
1160            ScalarType::Float => SyntaxKind::FLOAT,
1161            ScalarType::Boolean => SyntaxKind::BOOL,
1162            ScalarType::Null => SyntaxKind::NULL,
1163            _ => SyntaxKind::STRING,
1164        };
1165        builder.start_node(SyntaxKind::SCALAR.into());
1166        builder.token(token_kind.into(), self.value());
1167        builder.finish_node();
1168        false
1169    }
1170
1171    fn is_inline(&self) -> bool {
1172        true
1173    }
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178    use super::*;
1179
1180    #[test]
1181    fn test_plain_scalars() {
1182        let scalar = ScalarValue::string("simple");
1183        assert_eq!(scalar.to_yaml_string(), "simple");
1184
1185        let scalar = ScalarValue::string("hello world");
1186        assert_eq!(scalar.to_yaml_string(), "hello world");
1187    }
1188
1189    #[test]
1190    fn test_values_needing_quotes() {
1191        // Boolean-like values
1192        let scalar = ScalarValue::string("true");
1193        assert_eq!(scalar.to_yaml_string(), "'true'");
1194
1195        let scalar = ScalarValue::string("false");
1196        assert_eq!(scalar.to_yaml_string(), "'false'");
1197
1198        let scalar = ScalarValue::string("yes");
1199        assert_eq!(scalar.to_yaml_string(), "'yes'");
1200
1201        let scalar = ScalarValue::string("no");
1202        assert_eq!(scalar.to_yaml_string(), "'no'");
1203
1204        // Null-like values
1205        let scalar = ScalarValue::string("null");
1206        assert_eq!(scalar.to_yaml_string(), "'null'");
1207
1208        let scalar = ScalarValue::string("~");
1209        assert_eq!(scalar.to_yaml_string(), "'~'");
1210
1211        // Numbers
1212        let scalar = ScalarValue::string("123");
1213        assert_eq!(scalar.to_yaml_string(), "'123'");
1214
1215        let scalar = ScalarValue::string("3.14");
1216        assert_eq!(scalar.to_yaml_string(), "'3.14'");
1217
1218        // Special characters
1219        let scalar = ScalarValue::string("value: something");
1220        assert_eq!(scalar.to_yaml_string(), "'value: something'");
1221
1222        let scalar = ScalarValue::string("# comment");
1223        assert_eq!(scalar.to_yaml_string(), "'# comment'");
1224
1225        // Leading/trailing whitespace
1226        let scalar = ScalarValue::string("  spaces  ");
1227        assert_eq!(scalar.to_yaml_string(), "'  spaces  '");
1228    }
1229
1230    #[test]
1231    fn test_single_quoted() {
1232        let scalar = ScalarValue::single_quoted("value with 'quotes'");
1233        assert_eq!(scalar.to_yaml_string(), "'value with ''quotes'''");
1234    }
1235
1236    #[test]
1237    fn test_double_quoted() {
1238        let scalar = ScalarValue::double_quoted("value with \"quotes\" and \\backslash");
1239        assert_eq!(
1240            scalar.to_yaml_string(),
1241            "\"value with \\\"quotes\\\" and \\\\backslash\""
1242        );
1243
1244        let scalar = ScalarValue::double_quoted("line1\nline2\ttab");
1245        assert_eq!(scalar.to_yaml_string(), "\"line1\\nline2\\ttab\"");
1246    }
1247
1248    #[test]
1249    fn test_multiline() {
1250        let scalar = ScalarValue::string("line1\nline2\nline3");
1251        // Should auto-detect literal style for multiline
1252        assert_eq!(scalar.style(), ScalarStyle::Literal);
1253    }
1254
1255    #[test]
1256    fn test_from_types() {
1257        let scalar = ScalarValue::from(42);
1258        assert_eq!(scalar.to_yaml_string(), "42");
1259
1260        let scalar = ScalarValue::from(1.234);
1261        assert_eq!(scalar.to_yaml_string(), "1.234");
1262
1263        let scalar = ScalarValue::from(true);
1264        assert_eq!(scalar.to_yaml_string(), "true");
1265
1266        let scalar = ScalarValue::from(false);
1267        assert_eq!(scalar.to_yaml_string(), "false");
1268    }
1269
1270    #[test]
1271    fn test_empty_string() {
1272        let scalar = ScalarValue::string("");
1273        assert_eq!(scalar.to_yaml_string(), "''");
1274    }
1275
1276    #[test]
1277    fn test_special_start_chars() {
1278        let scalar = ScalarValue::string("-item");
1279        assert_eq!(scalar.to_yaml_string(), "'-item'");
1280
1281        let scalar = ScalarValue::string("?key");
1282        assert_eq!(scalar.to_yaml_string(), "'?key'");
1283
1284        let scalar = ScalarValue::string("[array]");
1285        assert_eq!(scalar.to_yaml_string(), "'[array]'");
1286    }
1287
1288    #[test]
1289    fn test_null_scalar() {
1290        let scalar = ScalarValue::null();
1291        assert_eq!(scalar.to_yaml_string(), "null");
1292        assert_eq!(scalar.scalar_type, ScalarType::Null);
1293    }
1294
1295    #[test]
1296    fn test_escape_sequences_basic() {
1297        // Test basic escape sequences
1298        assert_eq!(
1299            ScalarValue::parse_escape_sequences("hello\\nworld"),
1300            "hello\nworld"
1301        );
1302        assert_eq!(
1303            ScalarValue::parse_escape_sequences("tab\\there"),
1304            "tab\there"
1305        );
1306        assert_eq!(
1307            ScalarValue::parse_escape_sequences("quote\\\"test"),
1308            "quote\"test"
1309        );
1310        assert_eq!(
1311            ScalarValue::parse_escape_sequences("back\\\\slash"),
1312            "back\\slash"
1313        );
1314        assert_eq!(
1315            ScalarValue::parse_escape_sequences("return\\rtest"),
1316            "return\rtest"
1317        );
1318    }
1319
1320    #[test]
1321    fn test_escape_sequences_control_chars() {
1322        // Test control character escapes
1323        assert_eq!(ScalarValue::parse_escape_sequences("bell\\a"), "bell\x07");
1324        assert_eq!(
1325            ScalarValue::parse_escape_sequences("backspace\\b"),
1326            "backspace\x08"
1327        );
1328        assert_eq!(
1329            ScalarValue::parse_escape_sequences("formfeed\\f"),
1330            "formfeed\x0C"
1331        );
1332        assert_eq!(
1333            ScalarValue::parse_escape_sequences("escape\\e"),
1334            "escape\x1B"
1335        );
1336        assert_eq!(ScalarValue::parse_escape_sequences("vtab\\v"), "vtab\x0B");
1337        assert_eq!(ScalarValue::parse_escape_sequences("null\\0"), "null\0");
1338        assert_eq!(ScalarValue::parse_escape_sequences("slash\\/"), "slash/");
1339    }
1340
1341    #[test]
1342    fn test_escape_sequences_unicode_x() {
1343        // Test \xNN escape sequences
1344        assert_eq!(ScalarValue::parse_escape_sequences("\\x41"), "A"); // 0x41 = 'A'
1345        assert_eq!(ScalarValue::parse_escape_sequences("\\x7A"), "z"); // 0x7A = 'z'
1346        assert_eq!(ScalarValue::parse_escape_sequences("\\x20"), " "); // 0x20 = space
1347        assert_eq!(ScalarValue::parse_escape_sequences("\\xFF"), "\u{FF}"); // 0xFF = ΓΏ
1348
1349        // Test invalid hex sequences
1350        assert_eq!(ScalarValue::parse_escape_sequences("\\xGH"), "\\xGH"); // Invalid hex
1351        assert_eq!(ScalarValue::parse_escape_sequences("\\x4"), "\\x4"); // Incomplete
1352    }
1353
1354    #[test]
1355    fn test_escape_sequences_unicode_u() {
1356        // Test \uNNNN escape sequences
1357        assert_eq!(ScalarValue::parse_escape_sequences("\\u0041"), "A"); // 0x0041 = 'A'
1358        assert_eq!(ScalarValue::parse_escape_sequences("\\u03B1"), "Ξ±"); // Greek alpha
1359        assert_eq!(ScalarValue::parse_escape_sequences("\\u2603"), "β˜ƒ"); // Snowman
1360        assert_eq!(ScalarValue::parse_escape_sequences("\\u4E2D"), "δΈ­"); // Chinese character
1361
1362        // Test invalid sequences
1363        assert_eq!(ScalarValue::parse_escape_sequences("\\uGHIJ"), "\\uGHIJ"); // Invalid hex
1364        assert_eq!(ScalarValue::parse_escape_sequences("\\u041"), "\\u041"); // Incomplete
1365    }
1366
1367    #[test]
1368    fn test_escape_sequences_unicode_capital_u() {
1369        // Test \UNNNNNNNN escape sequences
1370        assert_eq!(ScalarValue::parse_escape_sequences("\\U00000041"), "A"); // 0x00000041 = 'A'
1371        assert_eq!(ScalarValue::parse_escape_sequences("\\U0001F603"), "πŸ˜ƒ"); // Smiley emoji
1372        assert_eq!(ScalarValue::parse_escape_sequences("\\U0001F4A9"), "πŸ’©"); // Pile of poo emoji
1373
1374        // Test invalid sequences
1375        assert_eq!(
1376            ScalarValue::parse_escape_sequences("\\UGHIJKLMN"),
1377            "\\UGHIJKLMN"
1378        ); // Invalid hex
1379        assert_eq!(
1380            ScalarValue::parse_escape_sequences("\\U0000004"),
1381            "\\U0000004"
1382        ); // Incomplete
1383        assert_eq!(
1384            ScalarValue::parse_escape_sequences("\\UFFFFFFFF"),
1385            "\\UFFFFFFFF"
1386        ); // Invalid code point
1387    }
1388
1389    #[test]
1390    fn test_escape_sequences_line_folding() {
1391        // Test line folding with escaped spaces and newlines
1392        assert_eq!(
1393            ScalarValue::parse_escape_sequences("line\\ \nfolding"),
1394            "linefolding"
1395        );
1396        assert_eq!(
1397            ScalarValue::parse_escape_sequences("escaped\\nline\\nbreak"),
1398            "escaped\nline\nbreak"
1399        );
1400        assert_eq!(
1401            ScalarValue::parse_escape_sequences("remove\\\nline\\nbreak"),
1402            "removeline\nbreak"
1403        );
1404    }
1405
1406    #[test]
1407    fn test_escape_sequences_mixed() {
1408        // Test mixed escape sequences
1409        let input = "Hello\\nWorld\\u0021\\x20\\U0001F44D";
1410        let expected = "Hello\nWorld! πŸ‘";
1411        assert_eq!(ScalarValue::parse_escape_sequences(input), expected);
1412
1413        // Test with quotes and backslashes
1414        let input = "Quote\\\"back\\\\slash\\ttab";
1415        let expected = "Quote\"back\\slash\ttab";
1416        assert_eq!(ScalarValue::parse_escape_sequences(input), expected);
1417    }
1418
1419    #[test]
1420    fn test_escape_sequences_unknown() {
1421        // Test unknown escape sequences are preserved
1422        assert_eq!(ScalarValue::parse_escape_sequences("\\q"), "\\q");
1423        assert_eq!(ScalarValue::parse_escape_sequences("\\z"), "\\z");
1424        assert_eq!(ScalarValue::parse_escape_sequences("\\1"), "\\1");
1425    }
1426
1427    #[test]
1428    fn test_indentation_preservation() {
1429        // Test preserving exact indentation in block scalars
1430        let content_with_indent = "  Line 1\n    Line 2 more indented\n  Line 3";
1431        let scalar = ScalarValue::literal(content_with_indent);
1432
1433        // Should detect that content already has indentation and preserve it
1434        let yaml_output = scalar.to_literal_with_indent(2);
1435        assert_eq!(
1436            yaml_output,
1437            "|\n    Line 1\n      Line 2 more indented\n    Line 3"
1438        );
1439    }
1440
1441    #[test]
1442    fn test_indentation_detection() {
1443        // Test content with consistent indentation
1444        let consistent_content = "  Line 1\n  Line 2\n  Line 3";
1445        let scalar1 = ScalarValue::literal(consistent_content);
1446        assert_eq!(scalar1.detect_content_indentation(), Some(2));
1447
1448        // Test content with no indentation
1449        let no_indent_content = "Line 1\nLine 2\nLine 3";
1450        let scalar2 = ScalarValue::literal(no_indent_content);
1451        assert_eq!(scalar2.detect_content_indentation(), None);
1452
1453        // Test content with inconsistent indentation
1454        let inconsistent_content = "  Line 1\n    Line 2\n Line 3";
1455        let scalar3 = ScalarValue::literal(inconsistent_content);
1456        assert_eq!(scalar3.detect_content_indentation(), None);
1457
1458        // Test empty content
1459        let empty_content = "";
1460        let scalar4 = ScalarValue::literal(empty_content);
1461        assert_eq!(scalar4.detect_content_indentation(), None);
1462
1463        // Test content with only whitespace lines
1464        let whitespace_content = "  Line 1\n\n  Line 3";
1465        let scalar5 = ScalarValue::literal(whitespace_content);
1466        assert_eq!(scalar5.detect_content_indentation(), Some(2));
1467    }
1468
1469    #[test]
1470    fn test_literal_with_custom_indent() {
1471        // Test applying custom indentation to unindented content
1472        let content = "Line 1\nLine 2\nLine 3";
1473        let scalar = ScalarValue::literal(content);
1474
1475        let yaml_4_spaces = scalar.to_literal_with_indent(4);
1476        assert_eq!(yaml_4_spaces, "|\n    Line 1\n    Line 2\n    Line 3");
1477
1478        let yaml_1_space = scalar.to_literal_with_indent(1);
1479        assert_eq!(yaml_1_space, "|\n Line 1\n Line 2\n Line 3");
1480    }
1481
1482    #[test]
1483    fn test_folded_with_custom_indent() {
1484        // Test applying custom indentation to folded scalars
1485        let content = "Line 1\nLine 2\nLine 3";
1486        let scalar = ScalarValue::folded(content);
1487
1488        let yaml_3_spaces = scalar.to_folded_with_indent(3);
1489        assert_eq!(yaml_3_spaces, ">\n   Line 1\n   Line 2\n   Line 3");
1490    }
1491
1492    #[test]
1493    fn test_mixed_empty_lines_preservation() {
1494        // Test handling of empty lines in block scalars
1495        let content_with_empty_lines = "Line 1\n\nLine 3\n\n\nLine 6";
1496        let scalar = ScalarValue::literal(content_with_empty_lines);
1497
1498        let yaml_output = scalar.to_literal_with_indent(2);
1499        assert_eq!(yaml_output, "|\n  Line 1\n\n  Line 3\n\n\n  Line 6");
1500
1501        // Empty lines should remain empty (no indentation added)
1502        // Input has 3 empty lines; they should appear unchanged in the output
1503        let lines: Vec<&str> = yaml_output.lines().collect();
1504        let empty_line_count = lines.iter().filter(|line| line.is_empty()).count();
1505        assert_eq!(empty_line_count, 3);
1506    }
1507
1508    #[test]
1509    fn test_escape_sequences_edge_cases() {
1510        // Test edge cases
1511        assert_eq!(ScalarValue::parse_escape_sequences(""), "");
1512        assert_eq!(ScalarValue::parse_escape_sequences("\\"), "\\");
1513        assert_eq!(
1514            ScalarValue::parse_escape_sequences("no escapes"),
1515            "no escapes"
1516        );
1517        assert_eq!(ScalarValue::parse_escape_sequences("\\\\\\\\"), "\\\\");
1518    }
1519
1520    #[test]
1521    fn test_double_quoted_with_escapes() {
1522        // Test that double-quoted scalars properly escape and unescape
1523        let original = "Hello\nWorld\tπŸ˜ƒ";
1524        let scalar = ScalarValue::double_quoted(original);
1525        let yaml_string = scalar.to_yaml_string();
1526
1527        // Should contain escaped sequences
1528        assert_eq!(yaml_string, "\"Hello\\nWorld\\t\\U0001F603\"");
1529
1530        // Parse it back
1531        let parsed = ScalarValue::parse_escape_sequences(&yaml_string[1..yaml_string.len() - 1]);
1532        assert_eq!(parsed, original);
1533    }
1534
1535    #[test]
1536    fn test_unicode_output_formatting() {
1537        // Test that Unicode characters are properly formatted in output
1538        let scalar = ScalarValue::double_quoted("Hello δΈ–η•Œ 🌍");
1539        let yaml_string = scalar.to_yaml_string();
1540
1541        // Should escape non-ASCII characters
1542        assert_eq!(yaml_string, "\"Hello \\u4E16\\u754C \\U0001F30D\"");
1543
1544        // But the internal value should remain unchanged
1545        assert_eq!(scalar.value(), "Hello δΈ–η•Œ 🌍");
1546    }
1547
1548    #[test]
1549    #[cfg(feature = "base64")]
1550    fn test_binary_data_encoding() {
1551        // Test creating binary scalar from raw bytes
1552        let data = b"Hello, World!";
1553        let scalar = ScalarValue::binary(data);
1554
1555        assert!(scalar.is_binary());
1556        assert_eq!(scalar.scalar_type(), ScalarType::Binary);
1557
1558        // Should produce valid base64
1559        let yaml_output = scalar.to_yaml_string();
1560        assert!(yaml_output.starts_with("!!binary "));
1561
1562        // Should be able to decode back to original data
1563        if let Some(decoded_result) = scalar.as_binary() {
1564            let decoded = decoded_result.expect("Should decode successfully");
1565            assert_eq!(decoded, data);
1566        } else {
1567            panic!("Should be able to extract binary data");
1568        }
1569    }
1570
1571    #[test]
1572    #[cfg(feature = "base64")]
1573    fn test_base64_encoding_decoding() {
1574        // Test various byte sequences
1575        let test_cases = [
1576            b"".as_slice(),
1577            b"A",
1578            b"AB",
1579            b"ABC",
1580            b"ABCD",
1581            b"Hello, World!",
1582            &[0, 1, 2, 3, 255, 254, 253],
1583        ];
1584
1585        for data in test_cases {
1586            let encoded = base64_encode(data);
1587            let decoded = base64_decode(&encoded).expect("Should decode successfully");
1588            assert_eq!(decoded, data, "Failed for data: {:?}", data);
1589        }
1590    }
1591
1592    #[test]
1593    fn test_timestamp_creation_and_validation() {
1594        // Test various timestamp formats
1595        let valid_timestamps = [
1596            "2023-12-25",
1597            "2023-12-25T10:30:45",
1598            "2023-12-25 10:30:45",
1599            "2023-12-25T10:30:45Z",
1600            "2001-12-14 21:59:43.10 -5", // Space-separated with timezone
1601            "2001-12-15T02:59:43.1Z",    // ISO 8601
1602            "2001-12-14t21:59:43.10-05:00", // Lowercase t
1603        ];
1604
1605        for ts in valid_timestamps {
1606            let scalar = ScalarValue::timestamp(ts);
1607            assert!(scalar.is_timestamp());
1608            assert_eq!(scalar.scalar_type(), ScalarType::Timestamp);
1609            assert_eq!(scalar.value(), ts);
1610
1611            let yaml_output = scalar.to_yaml_string();
1612            assert_eq!(yaml_output, format!("!!timestamp {}", ts));
1613
1614            // Test auto-detection recognizes it as timestamp
1615            let auto_scalar = ScalarValue::parse(ts);
1616            assert_eq!(
1617                auto_scalar.scalar_type(),
1618                ScalarType::Timestamp,
1619                "Failed to auto-detect '{}' as timestamp",
1620                ts
1621            );
1622        }
1623
1624        // Test invalid timestamps are not recognized
1625        let invalid_timestamps = [
1626            "not-a-date",
1627            "2023-13-01", // Invalid month
1628            "2023-12-32", // Invalid day
1629            "12:34:56",   // Time only (should be String)
1630            "2023/12/25", // Wrong separator
1631        ];
1632
1633        for ts in invalid_timestamps {
1634            let auto_scalar = ScalarValue::parse(ts);
1635            assert_ne!(
1636                auto_scalar.scalar_type(),
1637                ScalarType::Timestamp,
1638                "'{}' should not be detected as timestamp",
1639                ts
1640            );
1641        }
1642    }
1643
1644    #[test]
1645    fn test_regex_creation() {
1646        let pattern = r"^\d{3}-\d{2}-\d{4}$";
1647        let scalar = ScalarValue::regex(pattern);
1648
1649        assert!(scalar.is_regex());
1650        assert_eq!(scalar.scalar_type(), ScalarType::Regex);
1651        assert_eq!(scalar.value(), pattern);
1652
1653        let yaml_output = scalar.to_yaml_string();
1654        assert_eq!(yaml_output, format!("!!regex {}", pattern));
1655    }
1656
1657    #[test]
1658    fn test_regex_edge_cases() {
1659        // Test empty pattern
1660        let empty_regex = ScalarValue::regex("");
1661        assert!(empty_regex.is_regex());
1662        assert_eq!(empty_regex.value(), "");
1663        assert_eq!(empty_regex.to_yaml_string(), "!!regex ");
1664
1665        // Test pattern with special characters
1666        let special_chars = ScalarValue::regex(r"[.*+?^${}()|[\]\\]");
1667        assert!(special_chars.is_regex());
1668        assert_eq!(special_chars.value(), r"[.*+?^${}()|[\]\\]");
1669
1670        // Test unicode patterns
1671        let unicode_regex = ScalarValue::regex(r"\p{L}+");
1672        assert!(unicode_regex.is_regex());
1673        assert_eq!(unicode_regex.value(), r"\p{L}+");
1674
1675        // Test very long pattern
1676        let long_pattern = "a".repeat(1000);
1677        let long_regex = ScalarValue::regex(&long_pattern);
1678        assert!(long_regex.is_regex());
1679        assert_eq!(long_regex.value(), long_pattern);
1680
1681        // Test pattern with quotes and escapes
1682        let quoted_regex = ScalarValue::regex(r#"'quoted' and "double quoted" with \\ backslash"#);
1683        assert!(quoted_regex.is_regex());
1684        assert_eq!(
1685            quoted_regex.value(),
1686            r#"'quoted' and "double quoted" with \\ backslash"#
1687        );
1688    }
1689
1690    #[test]
1691    fn test_regex_type_coercion() {
1692        let regex_scalar = ScalarValue::regex(r"\d+");
1693
1694        // Test coercing regex to string
1695        let string_scalar = regex_scalar.coerce_to_type(ScalarType::String).unwrap();
1696        assert_eq!(string_scalar.scalar_type(), ScalarType::String);
1697        assert_eq!(string_scalar.value(), r"\d+");
1698        assert!(!string_scalar.is_regex());
1699
1700        // Test coercing string to regex
1701        let str_scalar = ScalarValue::string("test.*");
1702        let regex_from_string = str_scalar.coerce_to_type(ScalarType::Regex).unwrap();
1703        assert_eq!(regex_from_string.scalar_type(), ScalarType::Regex);
1704        assert_eq!(regex_from_string.value(), "test.*");
1705        assert!(regex_from_string.is_regex());
1706
1707        // Test that regex cannot be coerced to number types
1708        assert!(regex_scalar.coerce_to_type(ScalarType::Integer).is_none());
1709        assert!(regex_scalar.coerce_to_type(ScalarType::Float).is_none());
1710        assert!(regex_scalar.coerce_to_type(ScalarType::Boolean).is_none());
1711    }
1712
1713    #[test]
1714    #[cfg(feature = "regex")]
1715    fn test_regex_compilation() {
1716        // Test as_regex() with a regex scalar
1717        let regex_scalar = ScalarValue::regex(r"\d{3}-\d{4}");
1718        let compiled = regex_scalar.as_regex().unwrap();
1719        assert!(compiled.is_match("555-1234"));
1720        assert!(!compiled.is_match("not-a-phone"));
1721
1722        // Test as_regex() with a non-regex scalar returns None
1723        let string_scalar = ScalarValue::string("not a regex");
1724        assert!(string_scalar.as_regex().is_none());
1725
1726        // Test try_as_regex() with any scalar type
1727        let pattern_scalar = ScalarValue::string(r"^\w+@\w+\.\w+$");
1728        let email_regex = pattern_scalar.try_as_regex().unwrap();
1729        assert!(email_regex.is_match("test@example.com"));
1730        assert!(!email_regex.is_match("not-an-email"));
1731
1732        // Test with invalid regex pattern
1733        let invalid_scalar = ScalarValue::regex(r"[invalid(");
1734        assert!(invalid_scalar.as_regex().is_none());
1735
1736        // Test try_as_regex() with invalid pattern returns error
1737        let invalid_pattern = ScalarValue::string(r"[invalid(");
1738        assert!(invalid_pattern.try_as_regex().is_err());
1739    }
1740
1741    #[test]
1742    #[cfg(feature = "regex")]
1743    fn test_regex_extraction_use_cases() {
1744        // Test extracting and using regex for validation
1745        let validation_rules = [
1746            ScalarValue::regex(r"^\d{5}$"),                 // ZIP code
1747            ScalarValue::regex(r"^[A-Z]{2}$"),              // State code
1748            ScalarValue::regex(r"^\(\d{3}\) \d{3}-\d{4}$"), // Phone number
1749        ];
1750
1751        let test_values = ["12345", "CA", "(555) 123-4567"];
1752
1753        for (rule, value) in validation_rules.iter().zip(test_values.iter()) {
1754            let regex = rule.as_regex().unwrap();
1755            assert!(regex.is_match(value), "Pattern should match {}", value);
1756        }
1757
1758        // Test with complex regex patterns
1759        let email_regex = ScalarValue::regex(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$");
1760        let compiled = email_regex.as_regex().unwrap();
1761        assert!(compiled.is_match("user@example.com"));
1762        assert!(compiled.is_match("test.user+tag@sub.domain.org"));
1763        assert!(!compiled.is_match("invalid.email"));
1764
1765        // Test extracting capture groups
1766        let version_regex = ScalarValue::regex(r"^v(\d+)\.(\d+)\.(\d+)$");
1767        let compiled = version_regex.as_regex().unwrap();
1768        if let Some(captures) = compiled.captures("v1.2.3") {
1769            assert_eq!(captures.get(1).unwrap().as_str(), "1");
1770            assert_eq!(captures.get(2).unwrap().as_str(), "2");
1771            assert_eq!(captures.get(3).unwrap().as_str(), "3");
1772        } else {
1773            panic!("Should have matched version string");
1774        }
1775    }
1776
1777    #[test]
1778    fn test_type_coercion() {
1779        // Test coercing string to integer
1780        let str_scalar = ScalarValue::string("42");
1781        let int_scalar = str_scalar.coerce_to_type(ScalarType::Integer).unwrap();
1782        assert_eq!(int_scalar.scalar_type(), ScalarType::Integer);
1783        assert_eq!(int_scalar.value(), "42");
1784
1785        // Test coercing string to boolean
1786        let bool_scalar = ScalarValue::string("true")
1787            .coerce_to_type(ScalarType::Boolean)
1788            .unwrap();
1789        assert_eq!(bool_scalar.scalar_type(), ScalarType::Boolean);
1790        assert_eq!(bool_scalar.value(), "true");
1791
1792        // Test coercing boolean string variations
1793        let yes_scalar = ScalarValue::string("yes")
1794            .coerce_to_type(ScalarType::Boolean)
1795            .unwrap();
1796        assert_eq!(yes_scalar.value(), "true");
1797
1798        let no_scalar = ScalarValue::string("no")
1799            .coerce_to_type(ScalarType::Boolean)
1800            .unwrap();
1801        assert_eq!(no_scalar.value(), "false");
1802
1803        // Test failed coercion
1804        let str_scalar = ScalarValue::string("not_a_number");
1805        assert!(str_scalar.coerce_to_type(ScalarType::Integer).is_none());
1806    }
1807
1808    #[test]
1809    fn test_auto_type_detection() {
1810        // Test various automatic type detections
1811        assert_eq!(ScalarValue::auto_detect_type("42"), ScalarType::Integer);
1812        assert_eq!(ScalarValue::auto_detect_type("3.14"), ScalarType::Float);
1813        assert_eq!(ScalarValue::auto_detect_type("true"), ScalarType::Boolean);
1814        assert_eq!(ScalarValue::auto_detect_type("false"), ScalarType::Boolean);
1815        assert_eq!(ScalarValue::auto_detect_type("yes"), ScalarType::Boolean);
1816        assert_eq!(ScalarValue::auto_detect_type("null"), ScalarType::Null);
1817        assert_eq!(ScalarValue::auto_detect_type("~"), ScalarType::Null);
1818        assert_eq!(ScalarValue::auto_detect_type(""), ScalarType::Null);
1819        assert_eq!(
1820            ScalarValue::auto_detect_type("2023-12-25"),
1821            ScalarType::Timestamp
1822        );
1823        assert_eq!(
1824            ScalarValue::auto_detect_type("2023-12-25T10:30:45"),
1825            ScalarType::Timestamp
1826        );
1827        #[cfg(feature = "base64")]
1828        assert_eq!(
1829            ScalarValue::auto_detect_type("SGVsbG8gV29ybGQ="),
1830            ScalarType::Binary
1831        );
1832        #[cfg(not(feature = "base64"))]
1833        assert_eq!(
1834            ScalarValue::auto_detect_type("SGVsbG8gV29ybGQ="),
1835            ScalarType::String
1836        );
1837        assert_eq!(
1838            ScalarValue::auto_detect_type("hello world"),
1839            ScalarType::String
1840        );
1841    }
1842
1843    #[test]
1844    fn test_from_yaml_scalar_creation() {
1845        let int_scalar = ScalarValue::parse("123");
1846        assert_eq!(int_scalar.scalar_type(), ScalarType::Integer);
1847
1848        let bool_scalar = ScalarValue::parse("true");
1849        assert_eq!(bool_scalar.scalar_type(), ScalarType::Boolean);
1850
1851        let timestamp_scalar = ScalarValue::parse("2023-12-25");
1852        assert_eq!(timestamp_scalar.scalar_type(), ScalarType::Timestamp);
1853
1854        let string_scalar = ScalarValue::parse("hello world");
1855        assert_eq!(string_scalar.scalar_type(), ScalarType::String);
1856    }
1857
1858    #[test]
1859    fn test_timestamp_pattern_matching() {
1860        // Valid patterns
1861        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25"));
1862        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25T10:30:45"));
1863        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25t10:30:45")); // Lowercase t
1864        assert!(ScalarValue::matches_iso8601_pattern("2023-12-25 10:30:45"));
1865        assert!(ScalarValue::matches_iso8601_pattern("2023-01-01T00:00:00"));
1866        assert!(ScalarValue::matches_iso8601_pattern(
1867            "2001-12-14t21:59:43.10-05:00"
1868        )); // Complex with lowercase t
1869
1870        // Invalid patterns
1871        assert!(!ScalarValue::matches_iso8601_pattern("2023-13-25")); // Invalid month
1872        assert!(!ScalarValue::matches_iso8601_pattern("23-12-25")); // Wrong year format
1873        assert!(!ScalarValue::matches_iso8601_pattern("2023/12/25")); // Wrong separator
1874        assert!(!ScalarValue::matches_iso8601_pattern("not-a-date"));
1875        assert!(!ScalarValue::matches_iso8601_pattern("2023"));
1876    }
1877
1878    #[test]
1879    #[cfg(feature = "base64")]
1880    fn test_base64_detection() {
1881        // Valid base64 strings
1882        assert!(ScalarValue::looks_like_base64("SGVsbG8=")); // "Hello"
1883        assert!(ScalarValue::looks_like_base64("V29ybGQ=")); // "World"
1884        assert!(ScalarValue::looks_like_base64("SGVsbG8gV29ybGQ=")); // "Hello World"
1885        assert!(ScalarValue::looks_like_base64("AAAA")); // All A's
1886
1887        // Invalid base64 strings
1888        assert!(!ScalarValue::looks_like_base64("Hello")); // No padding, wrong chars
1889        assert!(!ScalarValue::looks_like_base64("SGVsbG8")); // Missing padding (7 chars, should be 8 with padding)
1890        assert!(!ScalarValue::looks_like_base64("")); // Empty
1891        assert!(!ScalarValue::looks_like_base64("SGV@")); // Invalid character
1892        assert!(!ScalarValue::looks_like_base64("SGVsbG8g===")); // Too much padding
1893    }
1894
1895    #[test]
1896    #[cfg(feature = "base64")]
1897    fn test_binary_yaml_output_with_tags() {
1898        let data = b"Binary data here";
1899        let scalar = ScalarValue::binary(data);
1900        let yaml_output = scalar.to_yaml_string();
1901
1902        assert!(yaml_output.starts_with("!!binary "));
1903
1904        // Extract just the base64 part
1905        let base64_part = &yaml_output[9..]; // Remove "!!binary "
1906        let decoded = base64_decode(base64_part).expect("Should decode");
1907        assert_eq!(decoded, data);
1908    }
1909
1910    #[test]
1911    #[cfg(feature = "base64")]
1912    fn test_special_data_types_with_different_styles() {
1913        // Binary with different styles should still include tag
1914        let data = b"test";
1915        let binary_scalar = ScalarValue::binary(data);
1916
1917        // Even if we change style, binary type should maintain tag
1918        let mut styled_binary = binary_scalar;
1919        styled_binary.style = ScalarStyle::DoubleQuoted;
1920
1921        // The to_yaml_string should still respect the scalar type for tagging
1922        assert_eq!(styled_binary.to_yaml_string(), "!!binary \"dGVzdA==\"");
1923    }
1924
1925    #[test]
1926    fn test_type_checking_methods() {
1927        #[cfg(feature = "base64")]
1928        let binary_scalar = ScalarValue::binary(b"test");
1929        let timestamp_scalar = ScalarValue::timestamp("2023-12-25");
1930        let regex_scalar = ScalarValue::regex(r"\d+");
1931        let string_scalar = ScalarValue::string("hello");
1932
1933        // Test type checking methods
1934        #[cfg(feature = "base64")]
1935        assert!(binary_scalar.is_binary());
1936        #[cfg(feature = "base64")]
1937        assert!(!binary_scalar.is_timestamp());
1938        #[cfg(feature = "base64")]
1939        assert!(!binary_scalar.is_regex());
1940
1941        #[cfg(feature = "base64")]
1942        assert!(!timestamp_scalar.is_binary());
1943        assert!(timestamp_scalar.is_timestamp());
1944        assert!(!timestamp_scalar.is_regex());
1945
1946        #[cfg(feature = "base64")]
1947        assert!(!regex_scalar.is_binary());
1948        assert!(!regex_scalar.is_timestamp());
1949        assert!(regex_scalar.is_regex());
1950
1951        #[cfg(feature = "base64")]
1952        assert!(!string_scalar.is_binary());
1953        assert!(!string_scalar.is_timestamp());
1954        assert!(!string_scalar.is_regex());
1955    }
1956
1957    #[test]
1958    fn test_binary_number_parsing() {
1959        // Test binary number parsing (0b prefix)
1960        assert_eq!(ScalarValue::parse_integer("0b1010"), Some(10));
1961        assert_eq!(ScalarValue::parse_integer("0b11111111"), Some(255));
1962        assert_eq!(ScalarValue::parse_integer("0B101"), Some(5)); // Uppercase B
1963        assert_eq!(ScalarValue::parse_integer("-0b1010"), Some(-10));
1964        assert_eq!(ScalarValue::parse_integer("+0b101"), Some(5));
1965
1966        // Test auto-detection
1967        assert_eq!(ScalarValue::auto_detect_type("0b1010"), ScalarType::Integer);
1968        assert_eq!(
1969            ScalarValue::auto_detect_type("0B11111111"),
1970            ScalarType::Integer
1971        );
1972
1973        // Test invalid binary
1974        assert_eq!(ScalarValue::parse_integer("0b1012"), None); // Contains invalid digit
1975        assert_eq!(ScalarValue::parse_integer("0b"), None); // Empty after prefix
1976    }
1977
1978    #[test]
1979    fn test_modern_octal_number_parsing() {
1980        // Test modern octal number parsing (0o prefix)
1981        assert_eq!(ScalarValue::parse_integer("0o755"), Some(493)); // 7*64 + 5*8 + 5
1982        assert_eq!(ScalarValue::parse_integer("0o644"), Some(420)); // 6*64 + 4*8 + 4
1983        assert_eq!(ScalarValue::parse_integer("0O777"), Some(511)); // Uppercase O
1984        assert_eq!(ScalarValue::parse_integer("-0o755"), Some(-493));
1985        assert_eq!(ScalarValue::parse_integer("+0o644"), Some(420));
1986
1987        // Test auto-detection
1988        assert_eq!(ScalarValue::auto_detect_type("0o755"), ScalarType::Integer);
1989        assert_eq!(ScalarValue::auto_detect_type("0O644"), ScalarType::Integer);
1990
1991        // Test invalid octal
1992        assert_eq!(ScalarValue::parse_integer("0o789"), None); // Contains invalid digit
1993        assert_eq!(ScalarValue::parse_integer("0o"), None); // Empty after prefix
1994    }
1995
1996    #[test]
1997    fn test_legacy_octal_number_parsing() {
1998        // Test legacy octal number parsing (0 prefix)
1999        assert_eq!(ScalarValue::parse_integer("0755"), Some(493));
2000        assert_eq!(ScalarValue::parse_integer("0644"), Some(420));
2001        assert_eq!(ScalarValue::parse_integer("0777"), Some(511));
2002
2003        // Test auto-detection
2004        assert_eq!(ScalarValue::auto_detect_type("0755"), ScalarType::Integer);
2005        assert_eq!(ScalarValue::auto_detect_type("0644"), ScalarType::Integer);
2006
2007        // Test edge cases
2008        assert_eq!(ScalarValue::parse_integer("0"), Some(0)); // Single zero
2009        assert_eq!(ScalarValue::parse_integer("00"), Some(0)); // Double zero
2010
2011        // Numbers starting with 0 but containing 8 or 9 should fail as octal
2012        assert_eq!(ScalarValue::parse_integer("0789"), None);
2013        assert_eq!(ScalarValue::parse_integer("0128"), None);
2014    }
2015
2016    #[test]
2017    fn test_hexadecimal_number_parsing() {
2018        // Test hexadecimal number parsing (0x prefix) - should still work
2019        assert_eq!(ScalarValue::parse_integer("0xFF"), Some(255));
2020        assert_eq!(ScalarValue::parse_integer("0x1A"), Some(26));
2021        assert_eq!(ScalarValue::parse_integer("0XFF"), Some(255)); // Uppercase X
2022        assert_eq!(ScalarValue::parse_integer("-0xFF"), Some(-255));
2023        assert_eq!(ScalarValue::parse_integer("+0x1A"), Some(26));
2024
2025        // Test auto-detection
2026        assert_eq!(ScalarValue::auto_detect_type("0xFF"), ScalarType::Integer);
2027        assert_eq!(ScalarValue::auto_detect_type("0X1A"), ScalarType::Integer);
2028    }
2029
2030    #[test]
2031    fn test_decimal_number_parsing() {
2032        // Test decimal number parsing (no prefix) - should still work
2033        assert_eq!(ScalarValue::parse_integer("42"), Some(42));
2034        assert_eq!(ScalarValue::parse_integer("123"), Some(123));
2035        assert_eq!(ScalarValue::parse_integer("-42"), Some(-42));
2036        assert_eq!(ScalarValue::parse_integer("+123"), Some(123));
2037
2038        // Test auto-detection
2039        assert_eq!(ScalarValue::auto_detect_type("42"), ScalarType::Integer);
2040        assert_eq!(ScalarValue::auto_detect_type("-123"), ScalarType::Integer);
2041    }
2042
2043    #[test]
2044    fn test_number_format_yaml_output() {
2045        // Test that different number formats are properly detected and output
2046        let binary_scalar = ScalarValue::parse("0b1010");
2047        assert_eq!(binary_scalar.scalar_type(), ScalarType::Integer);
2048        assert_eq!(binary_scalar.value(), "0b1010");
2049
2050        let octal_scalar = ScalarValue::parse("0o755");
2051        assert_eq!(octal_scalar.scalar_type(), ScalarType::Integer);
2052        assert_eq!(octal_scalar.value(), "0o755");
2053
2054        let hex_scalar = ScalarValue::parse("0xFF");
2055        assert_eq!(hex_scalar.scalar_type(), ScalarType::Integer);
2056        assert_eq!(hex_scalar.value(), "0xFF");
2057
2058        let legacy_octal_scalar = ScalarValue::parse("0755");
2059        assert_eq!(legacy_octal_scalar.scalar_type(), ScalarType::Integer);
2060        assert_eq!(legacy_octal_scalar.value(), "0755");
2061    }
2062}