Skip to main content

fast_yaml_core/
parser.rs

1use crate::error::ParseResult;
2use crate::value::Value;
3use saphyr::{ScalarOwned, YamlLoader};
4use saphyr_parser::{BufferedInput, Parser as SaphyrParser, ScalarStyle, Tag};
5
6/// Parser for YAML documents.
7///
8/// Wraps saphyr's YAML loading to provide a consistent API.
9#[derive(Debug)]
10pub struct Parser;
11
12impl Parser {
13    /// Parse a single YAML document from a string.
14    ///
15    /// Returns the first document if multiple are present, or None if the input is empty.
16    ///
17    /// # Errors
18    ///
19    /// Returns `ParseError::Scanner` if the YAML syntax is invalid.
20    ///
21    /// # Examples
22    ///
23    /// ```
24    /// use fast_yaml_core::Parser;
25    ///
26    /// let result = Parser::parse_str("name: test\nvalue: 123")?;
27    /// # Ok::<(), Box<dyn std::error::Error>>(())
28    /// ```
29    pub fn parse_str(input: &str) -> ParseResult<Option<Value>> {
30        let mut saphyr_parser = SaphyrParser::new(BufferedInput::new(input.chars()));
31        let mut loader = YamlLoader::<Value>::default();
32        loader.early_parse(false);
33        saphyr_parser.load(&mut loader, true)?;
34        let docs = inject_implicit_null_if_empty(loader.into_documents(), input);
35        Ok(docs.into_iter().next().map(canonicalize))
36    }
37
38    /// Parse all YAML documents from a string.
39    ///
40    /// Returns a vector of all documents found in the input.
41    ///
42    /// # Errors
43    ///
44    /// Returns `ParseError::Scanner` if the YAML syntax is invalid.
45    ///
46    /// # Examples
47    ///
48    /// ```
49    /// use fast_yaml_core::Parser;
50    ///
51    /// let docs = Parser::parse_all("---\nfoo: 1\n---\nbar: 2")?;
52    /// assert_eq!(docs.len(), 2);
53    /// # Ok::<(), Box<dyn std::error::Error>>(())
54    /// ```
55    pub fn parse_all(input: &str) -> ParseResult<Vec<Value>> {
56        let mut saphyr_parser = SaphyrParser::new(BufferedInput::new(input.chars()));
57        let mut loader = YamlLoader::<Value>::default();
58        loader.early_parse(false);
59        saphyr_parser.load(&mut loader, true)?;
60        let docs = inject_implicit_null_if_empty(loader.into_documents(), input);
61        Ok(docs.into_iter().map(canonicalize).collect())
62    }
63
64    /// Parse all YAML documents preserving scalar styles (literal `|`, folded `>`).
65    ///
66    /// Unlike [`parse_all`], this function uses `early_parse = false` in the loader,
67    /// which keeps scalars as `Value::Representation` nodes with their original style
68    /// information instead of resolving them eagerly.
69    ///
70    /// This is used by the format pipeline to preserve block scalar styles in output.
71    ///
72    /// # Errors
73    ///
74    /// Returns `ParseError::Scanner` if the YAML syntax is invalid.
75    ///
76    /// [`parse_all`]: Parser::parse_all
77    pub fn parse_all_preserving_styles(input: &str) -> ParseResult<Vec<Value>> {
78        let mut saphyr_parser = SaphyrParser::new(BufferedInput::new(input.chars()));
79        let mut loader = YamlLoader::<Value>::default();
80        loader.early_parse(false);
81        saphyr_parser.load(&mut loader, true)?;
82        Ok(inject_implicit_null_if_empty(
83            loader.into_documents(),
84            input,
85        ))
86    }
87}
88
89/// Returns `true` when `tag` is the YAML non-specific tag `!`.
90///
91/// The non-specific tag forces the failsafe schema: scalars resolve to plain strings
92/// regardless of their content (YAML 1.2 §6.8.1 / §10.3.2).
93fn is_non_specific_tag(tag: &Tag) -> bool {
94    tag.handle.is_empty() && tag.suffix == "!"
95}
96
97/// Injects one implicit null document when saphyr produces no documents for non-empty input.
98///
99/// Per YAML 1.2 §9.2, a stream with no explicit documents but non-empty content
100/// (comments, bare markers, whitespace) represents one document with an implicit null node.
101/// Empty string input stays `[]` to match `safe_load("")` → `None` behaviour.
102fn inject_implicit_null_if_empty(docs: Vec<Value>, input: &str) -> Vec<Value> {
103    if docs.is_empty() && !input.is_empty() {
104        vec![Value::Value(ScalarOwned::Null)]
105    } else {
106        docs
107    }
108}
109
110/// Canonicalize mixed-case YAML 1.2.2 bool/null variants that saphyr leaves as strings.
111///
112/// saphyr handles lowercase `true`, `false`, `null`, `~` natively.
113/// This function post-processes the tree to:
114/// - Resolve `Value::Representation` nodes (produced by `early_parse = false`) to typed scalars,
115///   applying explicit YAML core schema tags (`!!int`, `!!float`, `!!bool`, `!!null`, `!!str`)
116///   when present (#203).
117/// - Handle `True`, `TRUE`, `False`, `FALSE`, `Null` mixed-case variants.
118/// - Resolve YAML 1.1 merge keys (`<<: *anchor`) into parent mappings (#204).
119pub fn canonicalize(value: Value) -> Value {
120    match value {
121        Value::Representation(ref s, style, ref tag) => {
122            coerce_representation(s, style, tag.as_ref())
123        }
124        Value::Value(ScalarOwned::String(ref s)) => match s.as_str() {
125            "True" | "TRUE" => Value::Value(ScalarOwned::Boolean(true)),
126            "False" | "FALSE" => Value::Value(ScalarOwned::Boolean(false)),
127            "Null" | "NULL" => Value::Value(ScalarOwned::Null),
128            _ => value,
129        },
130        Value::Tagged(ref tag, ref inner) => coerce_tagged(tag, inner),
131        Value::Sequence(seq) => Value::Sequence(seq.into_iter().map(canonicalize).collect()),
132        Value::Mapping(map) => {
133            let canonicalized: crate::value::Map = map
134                .into_iter()
135                .map(|(k, v)| (canonicalize(k), canonicalize(v)))
136                .collect();
137            resolve_merge_keys(canonicalized)
138        }
139        other => other,
140    }
141}
142
143/// Parse a YAML core schema integer: decimal, hex (`0x`), or octal (`0o`).
144///
145/// Returns `None` for values that overflow `i64` or don't match integer syntax.
146fn parse_core_schema_int(s: &str) -> Option<i64> {
147    let (neg, digits) = s.strip_prefix('-').map_or_else(
148        || (false, s.strip_prefix('+').unwrap_or(s)),
149        |rest| (true, rest),
150    );
151    let raw: i64 = if let Some(hex) = digits
152        .strip_prefix("0x")
153        .or_else(|| digits.strip_prefix("0X"))
154    {
155        i64::from_str_radix(hex, 16).ok()?
156    } else if let Some(oct) = digits
157        .strip_prefix("0o")
158        .or_else(|| digits.strip_prefix("0O"))
159    {
160        i64::from_str_radix(oct, 8).ok()?
161    } else {
162        digits.parse::<i64>().ok()?
163    };
164    if neg { raw.checked_neg() } else { Some(raw) }
165}
166
167/// Returns `true` if `s` is an integer literal (decimal, hex, or octal) that may exceed `i64` range.
168///
169/// Matches optional `+`/`-` sign followed by `0x`/`0X` + hex digits, `0o`/`0O` + octal digits,
170/// or plain ASCII decimal digits.
171fn is_integer_literal(s: &str) -> bool {
172    let s = s.strip_prefix(['+', '-']).unwrap_or(s);
173    if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("0X")) {
174        return !hex.is_empty() && hex.bytes().all(|b| b.is_ascii_hexdigit());
175    }
176    if let Some(oct) = s.strip_prefix("0o").or_else(|| s.strip_prefix("0O")) {
177        return !oct.is_empty() && oct.bytes().all(|b| matches!(b, b'0'..=b'7'));
178    }
179    !s.is_empty() && s.bytes().all(|b| b.is_ascii_digit())
180}
181
182/// Attempt to coerce a float string to `i64` via truncation toward zero (`PyYAML` convention).
183///
184/// Returns `None` for non-finite values (.nan, .inf) and values outside the `i64` range.
185/// Values very close to `i64::MAX` may saturate due to `f64` precision limits — this is a
186/// known, benign edge case at the representable boundary.
187#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
188fn float_str_to_int(s: &str) -> Option<i64> {
189    parse_core_schema_float(s)
190        .filter(|f| f.is_finite() && *f >= i64::MIN as f64 && *f <= i64::MAX as f64)
191        .map(|f| f as i64)
192}
193
194/// Parse a YAML core schema float, handling special values (.inf, .nan, etc.).
195fn parse_core_schema_float(s: &str) -> Option<f64> {
196    match s {
197        ".inf" | ".Inf" | ".INF" => Some(f64::INFINITY),
198        "-.inf" | "-.Inf" | "-.INF" => Some(f64::NEG_INFINITY),
199        ".nan" | ".NaN" | ".NAN" => Some(f64::NAN),
200        // YAML 1.2 Core Schema float: optional sign, digits, optional fraction, optional exponent.
201        // Reject bare words like "infinity" or "nan" that Rust's f64::parse() accepts.
202        other => {
203            let s = other.strip_prefix(['+', '-']).unwrap_or(other);
204            let has_digit_start = s.starts_with(|c: char| c.is_ascii_digit());
205            let looks_like_float = has_digit_start
206                && s.chars().all(|c| {
207                    c.is_ascii_digit() || c == '.' || c == 'e' || c == 'E' || c == '+' || c == '-'
208                });
209            looks_like_float
210                .then(|| other.parse::<f64>().ok())
211                .flatten()
212        }
213    }
214}
215
216/// Coerce a `Value::Representation` scalar, applying the tag if present.
217///
218/// When `early_parse = false`, saphyr preserves the raw string, style, and tag in a
219/// `Representation` node. This function resolves that node to a typed `Value::Value`.
220fn coerce_representation(s: &str, style: ScalarStyle, tag: Option<&Tag>) -> Value {
221    // 1. Core-schema explicit tag (!!str, !!int, !!float, !!bool, !!null).
222    if let Some(tag) = tag.filter(|t| t.is_yaml_core_schema()) {
223        let coerced: Option<ScalarOwned> = match tag.suffix.as_str() {
224            "int" => parse_core_schema_int(s)
225                .or_else(|| float_str_to_int(s))
226                .map(ScalarOwned::Integer),
227            "float" => parse_core_schema_float(s).map(|f| ScalarOwned::FloatingPoint(f.into())),
228            "bool" => s.parse::<bool>().ok().map(ScalarOwned::Boolean),
229            "null" => matches!(s, "~" | "null" | "").then_some(ScalarOwned::Null),
230            "str" => Some(ScalarOwned::String(s.into())),
231            _ => None,
232        };
233        if let Some(scalar) = coerced {
234            return Value::Value(scalar);
235        }
236    }
237    // 2. Non-specific tag `!`: failsafe schema forces string (YAML 1.2 §6.8.1 / §10.3.2).
238    if tag.is_some_and(is_non_specific_tag) {
239        return Value::Value(ScalarOwned::String(s.into()));
240    }
241    // 3. No tag or unknown tag: non-plain scalars are always strings.
242    if style != ScalarStyle::Plain {
243        return Value::Value(ScalarOwned::String(s.into()));
244    }
245    // 4. Empty plain scalar with no tag: implicit null (YAML 1.2 §10.3.2, bare `---`).
246    if s.is_empty() {
247        return Value::Value(ScalarOwned::Null);
248    }
249    // 5. Plain scalar: apply saphyr's implicit resolution rules.
250    let scalar = match s {
251        "~" | "null" | "NULL" | "Null" => ScalarOwned::Null,
252        "true" | "True" | "TRUE" => ScalarOwned::Boolean(true),
253        "false" | "False" | "FALSE" => ScalarOwned::Boolean(false),
254        other => parse_core_schema_int(other).map_or_else(
255            || {
256                if is_integer_literal(other) {
257                    ScalarOwned::String(other.into())
258                } else {
259                    parse_core_schema_float(other).map_or_else(
260                        || ScalarOwned::String(other.into()),
261                        |f| ScalarOwned::FloatingPoint(f.into()),
262                    )
263                }
264            },
265            ScalarOwned::Integer,
266        ),
267    };
268    Value::Value(scalar)
269}
270
271/// Coerce a tagged value to the appropriate scalar type based on the YAML core schema tag suffix.
272fn coerce_tagged(tag: &Tag, inner: &Value) -> Value {
273    if tag.is_yaml_core_schema()
274        && let Value::Value(ScalarOwned::String(ref s)) = *inner
275    {
276        let coerced: Option<ScalarOwned> = match tag.suffix.as_str() {
277            "int" => parse_core_schema_int(s)
278                .or_else(|| float_str_to_int(s))
279                .map(ScalarOwned::Integer),
280            "float" => parse_core_schema_float(s).map(|f| ScalarOwned::FloatingPoint(f.into())),
281            "bool" => s.parse::<bool>().ok().map(ScalarOwned::Boolean),
282            "null" => matches!(s.as_str(), "~" | "null" | "").then_some(ScalarOwned::Null),
283            "str" => Some(ScalarOwned::String(s.clone())),
284            _ => None,
285        };
286        if let Some(scalar) = coerced {
287            return Value::Value(scalar);
288        }
289    }
290    canonicalize(inner.clone())
291}
292
293/// Resolve YAML 1.1 merge keys (`<<`) in a canonicalized mapping.
294///
295/// Explicit keys always win over merged keys.
296fn resolve_merge_keys(map: crate::value::Map) -> Value {
297    let merge_key = Value::Value(ScalarOwned::String("<<".into()));
298    if !map.contains_key(&merge_key) {
299        return Value::Mapping(map);
300    }
301
302    let mut result: crate::value::Map = crate::value::Map::new();
303    let mut merges: Vec<Value> = Vec::new();
304
305    for (k, v) in map {
306        if k == merge_key {
307            merges.push(v);
308        } else {
309            result.insert(k, v);
310        }
311    }
312
313    for merge_val in merges {
314        match merge_val {
315            Value::Mapping(merge_map) => {
316                for (mk, mv) in merge_map {
317                    result.entry(mk).or_insert(mv);
318                }
319            }
320            Value::Sequence(seq) => {
321                for item in seq {
322                    if let Value::Mapping(merge_map) = item {
323                        for (mk, mv) in merge_map {
324                            result.entry(mk).or_insert(mv);
325                        }
326                    }
327                }
328            }
329            _ => {}
330        }
331    }
332
333    Value::Mapping(result)
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    #[test]
341    fn test_parse_str_simple() {
342        let result = Parser::parse_str("name: test\nvalue: 123").unwrap();
343        assert!(result.is_some());
344    }
345
346    #[test]
347    fn test_parse_str_empty() {
348        let result = Parser::parse_str("").unwrap();
349        assert!(result.is_none());
350    }
351
352    #[test]
353    fn test_parse_all_multiple_docs() {
354        let docs = Parser::parse_all("---\nfoo: 1\n---\nbar: 2").unwrap();
355        assert_eq!(docs.len(), 2);
356    }
357
358    #[test]
359    fn test_yaml12_bool_true_variants() {
360        for variant in &["True", "TRUE"] {
361            let result = Parser::parse_str(&format!("val: {variant}"))
362                .unwrap()
363                .unwrap();
364            if let Value::Mapping(map) = result {
365                let v = map.values().next().unwrap();
366                assert!(
367                    matches!(v, Value::Value(ScalarOwned::Boolean(true))),
368                    "{variant} should be Bool(true)"
369                );
370            } else {
371                panic!("expected mapping");
372            }
373        }
374    }
375
376    #[test]
377    fn test_yaml12_bool_false_variants() {
378        for variant in &["False", "FALSE"] {
379            let result = Parser::parse_str(&format!("val: {variant}"))
380                .unwrap()
381                .unwrap();
382            if let Value::Mapping(map) = result {
383                let v = map.values().next().unwrap();
384                assert!(
385                    matches!(v, Value::Value(ScalarOwned::Boolean(false))),
386                    "{variant} should be Bool(false)"
387                );
388            } else {
389                panic!("expected mapping");
390            }
391        }
392    }
393
394    #[test]
395    fn test_yaml12_null_variant() {
396        let result = Parser::parse_str("val: Null").unwrap().unwrap();
397        if let Value::Mapping(map) = result {
398            let v = map.values().next().unwrap();
399            assert!(
400                matches!(v, Value::Value(ScalarOwned::Null)),
401                "Null should be Null"
402            );
403        } else {
404            panic!("expected mapping");
405        }
406    }
407
408    #[test]
409    fn test_parse_str_invalid() {
410        let result = Parser::parse_str("invalid: [\n  missing: bracket");
411        assert!(result.is_err());
412    }
413
414    #[test]
415    fn test_parse_nested() {
416        let yaml = r"
417person:
418  name: John
419  age: 30
420  hobbies:
421    - reading
422    - coding
423";
424        let result = Parser::parse_str(yaml).unwrap();
425        assert!(result.is_some());
426    }
427
428    #[test]
429    fn test_parse_anchors() {
430        let yaml = r"
431defaults: &defaults
432  adapter: postgres
433  host: localhost
434
435development:
436  <<: *defaults
437  database: dev_db
438";
439        let result = Parser::parse_str(yaml).unwrap();
440        assert!(result.is_some());
441    }
442
443    fn get_mapping_val(yaml: &str, key: &str) -> Value {
444        let result = Parser::parse_str(yaml).unwrap().unwrap();
445        let Value::Mapping(map) = result else {
446            panic!("expected mapping");
447        };
448        let k = Value::Value(ScalarOwned::String(key.into()));
449        map[&k].clone()
450    }
451
452    #[test]
453    fn test_explicit_tag_int_quoted() {
454        let v = get_mapping_val("val: !!int '42'", "val");
455        assert!(
456            matches!(v, Value::Value(ScalarOwned::Integer(42))),
457            "got {v:?}"
458        );
459    }
460
461    #[test]
462    fn test_explicit_tag_float() {
463        let v = get_mapping_val("val: !!float '3.14'", "val");
464        if let Value::Value(ScalarOwned::FloatingPoint(f)) = v {
465            #[allow(clippy::approx_constant)]
466            let expected = 3.14_f64;
467            assert!((f64::from(f) - expected).abs() < 1e-9);
468        } else {
469            panic!("expected FloatingPoint, got {v:?}");
470        }
471    }
472
473    #[test]
474    fn test_explicit_tag_bool() {
475        let v = get_mapping_val("val: !!bool 'true'", "val");
476        assert!(
477            matches!(v, Value::Value(ScalarOwned::Boolean(true))),
478            "got {v:?}"
479        );
480    }
481
482    #[test]
483    fn test_explicit_tag_null() {
484        let v = get_mapping_val("val: !!null ''", "val");
485        assert!(matches!(v, Value::Value(ScalarOwned::Null)), "got {v:?}");
486    }
487
488    #[test]
489    fn test_explicit_tag_str_int() {
490        let v = get_mapping_val("val: !!str 42", "val");
491        assert!(
492            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == "42"),
493            "got {v:?}"
494        );
495    }
496
497    #[test]
498    fn test_explicit_tag_int_float_truncation() {
499        let v = get_mapping_val("val: !!int 3.14", "val");
500        assert!(
501            matches!(v, Value::Value(ScalarOwned::Integer(3))),
502            "got {v:?}"
503        );
504    }
505
506    #[test]
507    fn test_explicit_tag_int_negative_float() {
508        let v = get_mapping_val("val: !!int -2.7", "val");
509        assert!(
510            matches!(v, Value::Value(ScalarOwned::Integer(-2))),
511            "got {v:?}"
512        );
513    }
514
515    #[test]
516    fn test_explicit_tag_int_scientific() {
517        let v = get_mapping_val("val: !!int 1.0e2", "val");
518        assert!(
519            matches!(v, Value::Value(ScalarOwned::Integer(100))),
520            "got {v:?}"
521        );
522    }
523
524    #[test]
525    fn test_explicit_tag_int_exact_float() {
526        let v = get_mapping_val("val: !!int 3.0", "val");
527        assert!(
528            matches!(v, Value::Value(ScalarOwned::Integer(3))),
529            "got {v:?}"
530        );
531    }
532
533    #[test]
534    fn test_explicit_tag_int_nan_rejected() {
535        let v = get_mapping_val("val: !!int .nan", "val");
536        assert!(
537            !matches!(v, Value::Value(ScalarOwned::Integer(_))),
538            "!!int .nan should not produce an integer, got {v:?}"
539        );
540    }
541
542    #[test]
543    fn test_explicit_tag_int_inf_rejected() {
544        let v = get_mapping_val("val: !!int .inf", "val");
545        assert!(
546            !matches!(v, Value::Value(ScalarOwned::Integer(_))),
547            "!!int .inf should not produce an integer, got {v:?}"
548        );
549    }
550
551    #[test]
552    fn test_explicit_tag_int_overflow_rejected() {
553        let v = get_mapping_val("val: !!int 1.0e20", "val");
554        assert!(
555            !matches!(v, Value::Value(ScalarOwned::Integer(_))),
556            "!!int 1.0e20 should not produce a saturated integer, got {v:?}"
557        );
558    }
559
560    #[test]
561    fn test_merge_key_basic() {
562        let yaml = r"
563defaults: &defaults
564  adapter: postgres
565  host: localhost
566development:
567  <<: *defaults
568  database: dev_db
569";
570        let result = Parser::parse_str(yaml).unwrap().unwrap();
571        let Value::Mapping(root) = result else {
572            panic!("expected mapping")
573        };
574        let dev_key = Value::Value(ScalarOwned::String("development".into()));
575        let Value::Mapping(dev) = root[&dev_key].clone() else {
576            panic!("expected mapping")
577        };
578
579        let adapter_key = Value::Value(ScalarOwned::String("adapter".into()));
580        let host_key = Value::Value(ScalarOwned::String("host".into()));
581        let db_key = Value::Value(ScalarOwned::String("database".into()));
582
583        assert!(dev.contains_key(&adapter_key), "adapter should be merged");
584        assert!(dev.contains_key(&host_key), "host should be merged");
585        assert!(dev.contains_key(&db_key), "database should be present");
586        assert!(
587            !dev.contains_key(&Value::Value(ScalarOwned::String("<<".into()))),
588            "<< should be removed"
589        );
590    }
591
592    #[test]
593    fn test_merge_key_explicit_wins() {
594        let yaml = r"
595base: &base
596  host: localhost
597  port: 5432
598override:
599  <<: *base
600  host: remotehost
601";
602        let result = Parser::parse_str(yaml).unwrap().unwrap();
603        let Value::Mapping(root) = result else {
604            panic!("expected mapping")
605        };
606        let ov_key = Value::Value(ScalarOwned::String("override".into()));
607        let Value::Mapping(ov) = root[&ov_key].clone() else {
608            panic!("expected mapping")
609        };
610        let host_key = Value::Value(ScalarOwned::String("host".into()));
611        assert!(
612            matches!(&ov[&host_key], Value::Value(ScalarOwned::String(s)) if s == "remotehost"),
613            "explicit host should win over merged"
614        );
615    }
616
617    #[test]
618    fn test_merge_key_sequence() {
619        let yaml = r"
620a: &a
621  x: 1
622b: &b
623  y: 2
624merged:
625  <<: [*a, *b]
626  z: 3
627";
628        let result = Parser::parse_str(yaml).unwrap().unwrap();
629        let Value::Mapping(root) = result else {
630            panic!("expected mapping")
631        };
632        let m_key = Value::Value(ScalarOwned::String("merged".into()));
633        let Value::Mapping(m) = root[&m_key].clone() else {
634            panic!("expected mapping")
635        };
636
637        let x = Value::Value(ScalarOwned::String("x".into()));
638        let y = Value::Value(ScalarOwned::String("y".into()));
639        let z = Value::Value(ScalarOwned::String("z".into()));
640        assert!(m.contains_key(&x), "x should be merged from *a");
641        assert!(m.contains_key(&y), "y should be merged from *b");
642        assert!(m.contains_key(&z), "z should be present");
643    }
644
645    #[test]
646    fn test_i64_max_boundary() {
647        let v = get_mapping_val("x: 9223372036854775807", "x");
648        assert!(
649            matches!(v, Value::Value(ScalarOwned::Integer(i64::MAX))),
650            "i64::MAX should stay Integer, got {v:?}"
651        );
652
653        let v = get_mapping_val("x: 9223372036854775808", "x");
654        assert!(
655            matches!(v, Value::Value(ScalarOwned::String(_))),
656            "i64::MAX+1 should become String, got {v:?}"
657        );
658    }
659
660    #[test]
661    fn test_leading_plus_large_integer() {
662        let v = get_mapping_val("x: +42", "x");
663        assert!(
664            matches!(v, Value::Value(ScalarOwned::Integer(42))),
665            "+42 should be Integer(42), got {v:?}"
666        );
667
668        let v = get_mapping_val("x: +99999999999999999999", "x");
669        assert!(
670            matches!(v, Value::Value(ScalarOwned::String(_))),
671            "+overflow should be String, got {v:?}"
672        );
673    }
674
675    #[test]
676    fn test_large_integer_preserved_as_string() {
677        let big =
678            "99999999999999999999999999999999999999999999999999999999999999999999999999999999";
679        let v = get_mapping_val(&format!("x: {big}"), "x");
680        assert!(
681            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == big),
682            "got {v:?}"
683        );
684    }
685
686    #[test]
687    fn test_normal_integer_unaffected() {
688        let v = get_mapping_val("x: 42", "x");
689        assert!(
690            matches!(v, Value::Value(ScalarOwned::Integer(42))),
691            "got {v:?}"
692        );
693    }
694
695    #[test]
696    fn test_float_unaffected() {
697        let v = get_mapping_val("x: 1.5e10", "x");
698        assert!(
699            matches!(v, Value::Value(ScalarOwned::FloatingPoint(_))),
700            "got {v:?}"
701        );
702    }
703
704    #[test]
705    fn test_negative_large_integer() {
706        let big = "-99999999999999999999999999999999";
707        let v = get_mapping_val(&format!("x: {big}"), "x");
708        assert!(
709            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == big),
710            "got {v:?}"
711        );
712    }
713
714    #[test]
715    fn test_hex_overflow_preserved_as_string() {
716        let v = get_mapping_val("x: 0x8000000000000000", "x");
717        assert!(
718            matches!(v, Value::Value(ScalarOwned::String(_))),
719            "hex overflow should be String, got {v:?}"
720        );
721    }
722
723    #[test]
724    fn test_hex_max_i64_preserved_as_integer() {
725        // 0x7FFFFFFFFFFFFFFF == i64::MAX == 9223372036854775807
726        let v = get_mapping_val("x: 0x7FFFFFFFFFFFFFFF", "x");
727        assert!(
728            matches!(
729                v,
730                Value::Value(ScalarOwned::Integer(9_223_372_036_854_775_807))
731            ),
732            "0x7FFFFFFFFFFFFFFF should be Integer(i64::MAX), got {v:?}"
733        );
734    }
735
736    #[test]
737    fn test_octal_overflow_preserved_as_string() {
738        let v = get_mapping_val("x: 0o1000000000000000000000", "x");
739        assert!(
740            matches!(v, Value::Value(ScalarOwned::String(_))),
741            "octal overflow should be String, got {v:?}"
742        );
743    }
744
745    #[test]
746    fn test_octal_max_fitting_preserved_as_integer() {
747        // 0o777777777777777777777 == i64::MAX == 9223372036854775807
748        let v = get_mapping_val("x: 0o777777777777777777777", "x");
749        assert!(
750            matches!(
751                v,
752                Value::Value(ScalarOwned::Integer(9_223_372_036_854_775_807))
753            ),
754            "0o777777777777777777777 should be Integer(i64::MAX), got {v:?}"
755        );
756    }
757
758    #[test]
759    fn test_tagged_int_hex_fits_i64() {
760        let v = get_mapping_val("x: !!int 0xFF", "x");
761        assert!(
762            matches!(v, Value::Value(ScalarOwned::Integer(255))),
763            "!!int 0xFF should be Integer(255), got {v:?}"
764        );
765    }
766
767    #[test]
768    fn test_tagged_int_hex_overflow_preserved_as_string() {
769        let v = get_mapping_val("x: !!int 0x8000000000000000", "x");
770        assert!(
771            matches!(v, Value::Value(ScalarOwned::String(_))),
772            "!!int hex overflow should be String, got {v:?}"
773        );
774    }
775
776    #[test]
777    fn test_negative_hex_overflow_preserved_as_string() {
778        // -0x8000000000000000 == i64::MIN, which fits; -0x8000000000000001 overflows.
779        // Both produce String because parse_core_schema_int does not handle sign + 0x prefix —
780        // consistent with the decimal path where signed hex is not a YAML 1.2 core schema form.
781        let v = get_mapping_val("x: -0x8000000000000001", "x");
782        assert!(
783            matches!(v, Value::Value(ScalarOwned::String(_))),
784            "negative hex overflow should be String, got {v:?}"
785        );
786    }
787
788    #[test]
789    fn test_tagged_int_octal_overflow_preserved_as_string() {
790        let v = get_mapping_val("x: !!int 0o1000000000000000000000", "x");
791        assert!(
792            matches!(v, Value::Value(ScalarOwned::String(_))),
793            "!!int octal overflow should be String, got {v:?}"
794        );
795    }
796
797    #[test]
798    fn test_uppercase_prefix_hex_overflow_preserved_as_string() {
799        let v = get_mapping_val("x: 0XDEADBEEFDEADBEEF", "x");
800        assert!(
801            matches!(v, Value::Value(ScalarOwned::String(_))),
802            "0X uppercase prefix overflow should be String, got {v:?}"
803        );
804    }
805
806    #[test]
807    fn test_uppercase_prefix_octal_overflow_preserved_as_string() {
808        let v = get_mapping_val("x: 0O1000000000000000000000", "x");
809        assert!(
810            matches!(v, Value::Value(ScalarOwned::String(_))),
811            "0O uppercase prefix overflow should be String, got {v:?}"
812        );
813    }
814
815    // --- #235: empty/comment-only/bare-marker streams yield one null doc ---
816
817    #[test]
818    fn test_empty_string_yields_empty_vec() {
819        let docs = Parser::parse_all("").unwrap();
820        assert!(docs.is_empty(), "empty string must stay []");
821    }
822
823    #[test]
824    fn test_whitespace_only_yields_null_doc() {
825        let docs = Parser::parse_all("   ").unwrap();
826        assert_eq!(docs.len(), 1);
827        assert!(matches!(docs[0], Value::Value(ScalarOwned::Null)));
828    }
829
830    #[test]
831    fn test_comment_only_yields_null_doc() {
832        let docs = Parser::parse_all("# comment").unwrap();
833        assert_eq!(docs.len(), 1);
834        assert!(matches!(docs[0], Value::Value(ScalarOwned::Null)));
835    }
836
837    #[test]
838    fn test_bare_doc_end_yields_null_doc() {
839        let docs = Parser::parse_all("...").unwrap();
840        assert_eq!(docs.len(), 1);
841        assert!(matches!(docs[0], Value::Value(ScalarOwned::Null)));
842    }
843
844    #[test]
845    fn test_comment_then_doc_end_yields_null_doc() {
846        let docs = Parser::parse_all("# c\n...").unwrap();
847        assert_eq!(docs.len(), 1);
848        assert!(matches!(docs[0], Value::Value(ScalarOwned::Null)));
849    }
850
851    #[test]
852    fn test_bare_doc_start_yields_null_doc() {
853        let docs = Parser::parse_all("---").unwrap();
854        assert_eq!(docs.len(), 1);
855        assert!(matches!(docs[0], Value::Value(ScalarOwned::Null)));
856    }
857
858    #[test]
859    fn test_parse_str_comment_only_returns_null() {
860        let result = Parser::parse_str("# comment").unwrap();
861        assert!(matches!(result, Some(Value::Value(ScalarOwned::Null))));
862    }
863
864    #[test]
865    fn test_parse_str_empty_unchanged() {
866        let result = Parser::parse_str("").unwrap();
867        assert!(result.is_none(), "empty string must still return None");
868    }
869
870    #[test]
871    fn test_bom_only_yields_one_doc() {
872        // BOM-only: saphyr processes the BOM and returns one document (empty Null scalar).
873        // inject_implicit_null_if_empty is not needed here — saphyr handles it.
874        // Document: BOM-only → 1 doc (saphyr behaviour, not injected).
875        let docs = Parser::parse_all("\u{FEFF}").unwrap();
876        assert_eq!(docs.len(), 1, "BOM-only should yield exactly one document");
877    }
878
879    // --- #238: non-specific tag `!` forces string (failsafe schema) ---
880
881    #[test]
882    fn test_non_specific_tag_plain_integer_is_string() {
883        let v = get_mapping_val("x: ! 99", "x");
884        assert!(
885            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == "99"),
886            "! 99 should be String(\"99\"), got {v:?}"
887        );
888    }
889
890    #[test]
891    fn test_non_specific_tag_quoted_is_string() {
892        let v = get_mapping_val("x: ! \"99\"", "x");
893        assert!(
894            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == "99"),
895            "! \"99\" should be String(\"99\"), got {v:?}"
896        );
897    }
898
899    #[test]
900    fn test_non_specific_tag_true_is_string() {
901        let v = get_mapping_val("x: ! true", "x");
902        assert!(
903            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == "true"),
904            "! true should be String(\"true\"), got {v:?}"
905        );
906    }
907
908    #[test]
909    fn test_non_specific_tag_null_keyword_is_string() {
910        let v = get_mapping_val("x: ! null", "x");
911        assert!(
912            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s == "null"),
913            "! null should be String(\"null\"), got {v:?}"
914        );
915    }
916
917    #[test]
918    fn test_non_specific_tag_empty_is_string_not_null() {
919        // `! ''` must be String(""), NOT Null. Order of branches is load-bearing.
920        let v = get_mapping_val("x: ! ''", "x");
921        assert!(
922            matches!(v, Value::Value(ScalarOwned::String(ref s)) if s.is_empty()),
923            "! '' should be String(\"\") not Null, got {v:?}"
924        );
925    }
926
927    #[test]
928    fn test_non_specific_tag_on_sequence_is_noop() {
929        // Non-specific tag on collection: failsafe seq = plain seq.
930        let yaml = "x: ! [1, 2]";
931        let result = Parser::parse_str(yaml).unwrap().unwrap();
932        let Value::Mapping(map) = result else {
933            panic!("expected mapping")
934        };
935        let k = Value::Value(ScalarOwned::String("x".into()));
936        let val = &map[&k];
937        assert!(
938            matches!(val, Value::Sequence(_)),
939            "! on sequence must stay Sequence, got {val:?}"
940        );
941    }
942
943    // --- #235 round-trip: format(parse("# c")) freezes new expected output ---
944
945    #[test]
946    fn test_round_trip_comment_only() {
947        use crate::emitter::Emitter;
948        let docs = Parser::parse_all_preserving_styles("# comment").unwrap();
949        assert_eq!(docs.len(), 1, "should have one null doc");
950        // The null doc formats to "null\n" or "~\n" — freeze whatever the emitter produces.
951        let formatted = Emitter::emit_all(&docs).unwrap();
952        assert!(
953            !formatted.is_empty(),
954            "formatted output must be non-empty, got: {formatted:?}"
955        );
956        // Null should not format as empty string.
957        assert_ne!(formatted.trim(), "", "null doc must not format to empty");
958    }
959}