Skip to main content

rlsp_yaml_parser/
schema.rs

1// SPDX-License-Identifier: MIT
2
3//! YAML 1.2.2 §10 schema tag resolution.
4//!
5//! Three schemas are provided, in increasing generality:
6//!
7//! - [`Schema::Failsafe`] — all scalars resolve to `!!str`, all sequences to
8//!   `!!seq`, all mappings to `!!map`.
9//! - [`Schema::Json`] — narrow pattern set; unmatched plain scalars are an
10//!   error ([`UnresolvedScalar`]).
11//! - [`Schema::Core`] — superset of JSON; unmatched plain scalars fall back to
12//!   `!!str`.
13//!
14//! Use [`resolve_scalar`] and [`resolve_collection`] to apply a schema to a
15//! node.  When the node already carries an explicit source tag, both functions
16//! return `None` / `Ok(None)` — the caller's tag takes precedence.
17
18use crate::event::ScalarStyle;
19
20// ---------------------------------------------------------------------------
21// Public types
22// ---------------------------------------------------------------------------
23
24/// YAML 1.2.2 §10 recommended schema selection.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum Schema {
27    /// Failsafe schema (§10.1): scalars → `str`, sequences → `seq`,
28    /// mappings → `map`.
29    Failsafe,
30    /// JSON schema (§10.2): narrow pattern set; unmatched plain scalars
31    /// produce [`UnresolvedScalar`].
32    Json,
33    /// Core schema (§10.3): superset of JSON; unmatched plain scalars fall
34    /// back to `str`.
35    Core,
36}
37
38/// The resolved YAML tag for a node.
39///
40/// Each variant carries the URI constant for that tag family.
41#[derive(Debug, Clone, Copy, PartialEq, Eq)]
42pub enum ResolvedTag {
43    /// `tag:yaml.org,2002:str`
44    Str,
45    /// `tag:yaml.org,2002:int`
46    Int,
47    /// `tag:yaml.org,2002:float`
48    Float,
49    /// `tag:yaml.org,2002:bool`
50    Bool,
51    /// `tag:yaml.org,2002:null`
52    Null,
53    /// `tag:yaml.org,2002:seq`
54    Seq,
55    /// `tag:yaml.org,2002:map`
56    Map,
57}
58
59impl ResolvedTag {
60    /// Returns the `tag:yaml.org,2002:*` URI for this tag.
61    #[must_use]
62    pub const fn as_str(self) -> &'static str {
63        match self {
64            Self::Str => "tag:yaml.org,2002:str",
65            Self::Int => "tag:yaml.org,2002:int",
66            Self::Float => "tag:yaml.org,2002:float",
67            Self::Bool => "tag:yaml.org,2002:bool",
68            Self::Null => "tag:yaml.org,2002:null",
69            Self::Seq => "tag:yaml.org,2002:seq",
70            Self::Map => "tag:yaml.org,2002:map",
71        }
72    }
73}
74
75/// Error returned by [`resolve_scalar`] when the JSON schema cannot match a
76/// plain scalar value.
77///
78/// The JSON schema has no fallback — every untagged plain scalar must match one
79/// of its patterns (null, bool, int, float).  If none match, the scalar is
80/// unresolvable under JSON schema rules.
81#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
82#[error("unresolved scalar: no JSON schema pattern matched the plain scalar value")]
83pub struct UnresolvedScalar;
84
85/// Collection kind, used as a parameter to [`resolve_collection`].
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum CollectionKind {
88    /// A YAML sequence (`!!seq`).
89    Sequence,
90    /// A YAML mapping (`!!map`).
91    Mapping,
92}
93
94// ---------------------------------------------------------------------------
95// Resolution functions
96// ---------------------------------------------------------------------------
97
98/// Resolve the tag for a scalar node under the given schema.
99///
100/// # Return value
101///
102/// - `Ok(None)` — `source_tag` is `Some`; the existing explicit tag wins, no
103///   schema resolution applied.
104/// - `Ok(Some(tag))` — resolution succeeded; `tag` is the resolved YAML tag.
105///
106/// # Errors
107///
108/// Returns [`Err(UnresolvedScalar)`](UnresolvedScalar) only with
109/// [`Schema::Json`] when the scalar style is [`ScalarStyle::Plain`] and no
110/// JSON pattern matched.
111///
112/// # Style semantics
113///
114/// Only [`ScalarStyle::Plain`] scalars participate in pattern matching.  All
115/// other styles (single-quoted, double-quoted, literal block, folded block)
116/// resolve unconditionally to `!!str` — the content of a quoted or block scalar
117/// is always a string regardless of what the characters spell.
118#[inline]
119pub fn resolve_scalar(
120    schema: Schema,
121    style: ScalarStyle,
122    value: &str,
123    source_tag: Option<&str>,
124) -> Result<Option<ResolvedTag>, UnresolvedScalar> {
125    // Explicit source tag takes priority over schema resolution.
126    if source_tag.is_some() {
127        return Ok(None);
128    }
129
130    match schema {
131        Schema::Failsafe => Ok(Some(ResolvedTag::Str)),
132
133        Schema::Core => {
134            let tag = match style {
135                ScalarStyle::Plain => resolve_core_plain(value),
136                // All non-plain styles are unconditionally !!str.
137                ScalarStyle::SingleQuoted
138                | ScalarStyle::DoubleQuoted
139                | ScalarStyle::Literal(_)
140                | ScalarStyle::Folded(_) => ResolvedTag::Str,
141            };
142            Ok(Some(tag))
143        }
144
145        Schema::Json => {
146            let tag = match style {
147                ScalarStyle::Plain => resolve_json_plain(value)?,
148                // Non-plain styles are !!str in JSON schema too.
149                ScalarStyle::SingleQuoted
150                | ScalarStyle::DoubleQuoted
151                | ScalarStyle::Literal(_)
152                | ScalarStyle::Folded(_) => ResolvedTag::Str,
153            };
154            Ok(Some(tag))
155        }
156    }
157}
158
159/// Resolve the tag for a collection node under the given schema.
160///
161/// # Return value
162///
163/// - `None` — `source_tag` is `Some`; the existing explicit tag wins.
164/// - `Some(tag)` — resolved tag (`Seq` or `Map`) according to `kind`.
165///
166/// All three schemas resolve sequences to `!!seq` and mappings to `!!map`.
167#[must_use]
168pub const fn resolve_collection(
169    schema: Schema,
170    kind: CollectionKind,
171    source_tag: Option<&str>,
172) -> Option<ResolvedTag> {
173    // Explicit source tag wins.
174    if source_tag.is_some() {
175        return None;
176    }
177    // All three schemas map sequences → !!seq and mappings → !!map.
178    let _ = schema;
179    Some(match kind {
180        CollectionKind::Sequence => ResolvedTag::Seq,
181        CollectionKind::Mapping => ResolvedTag::Map,
182    })
183}
184
185// ---------------------------------------------------------------------------
186// Core schema plain-scalar dispatch (§10.3)
187// ---------------------------------------------------------------------------
188
189/// Resolve a plain scalar under the Core schema.
190///
191/// Dispatches on the first byte to prune the common-case `Str` outcome before
192/// any pattern matcher runs. Each branch covers exactly the prefix set of the
193/// matcher(s) it invokes — bytes outside the enumerated set can only be `Str`.
194#[inline]
195fn resolve_core_plain(value: &str) -> ResolvedTag {
196    match value.as_bytes().first().copied() {
197        // Empty string or "~" → null (the only two direct-return null forms).
198        None | Some(b'~') => ResolvedTag::Null,
199        // "null" | "Null" | "NULL" start with 'n'/'N'; only null uses these.
200        Some(b'n' | b'N') => {
201            if is_core_null(value) {
202                ResolvedTag::Null
203            } else {
204                ResolvedTag::Str
205            }
206        }
207        // "true"/"True"/"TRUE"/"false"/"False"/"FALSE".
208        Some(b't' | b'T' | b'f' | b'F') => {
209            if is_core_bool(value) {
210                ResolvedTag::Bool
211            } else {
212                ResolvedTag::Str
213            }
214        }
215        // Decimal/octal/hex integers and decimal floats with a leading digit or sign.
216        Some(b'-' | b'+' | b'0'..=b'9') => {
217            if is_core_int(value) {
218                ResolvedTag::Int
219            } else if is_core_float(value) {
220                ResolvedTag::Float
221            } else {
222                ResolvedTag::Str
223            }
224        }
225        // ".inf"/".Inf"/".INF"/".nan"/".NaN"/".NAN" and leading-dot decimal floats.
226        Some(b'.') => {
227            if is_core_float(value) {
228                ResolvedTag::Float
229            } else {
230                ResolvedTag::Str
231            }
232        }
233        // Any other first byte cannot match null/bool/int/float — return Str directly.
234        Some(_) => ResolvedTag::Str,
235    }
236}
237
238// ---------------------------------------------------------------------------
239// JSON schema plain-scalar dispatch (§10.2)
240// ---------------------------------------------------------------------------
241
242/// Resolve a plain scalar under the JSON schema.
243///
244/// Dispatch order: null → bool → int → float.  No fallback — unmatched
245/// scalars return `Err(UnresolvedScalar)`.
246///
247/// Note on `-0`: JSON int is `0 | -?[1-9][0-9]*`, so `-0` is not a JSON int
248/// (the single-`0` branch is bare, with no sign).  JSON float is
249/// `-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][-+]?[0-9]+)?`, so `-0` matches
250/// (sign `-`, integer part `0`, no fractional or exponent).  Therefore `-0`
251/// resolves to `Float` under the JSON schema.
252fn resolve_json_plain(value: &str) -> Result<ResolvedTag, UnresolvedScalar> {
253    if is_json_null(value) {
254        Ok(ResolvedTag::Null)
255    } else if is_json_bool(value) {
256        Ok(ResolvedTag::Bool)
257    } else if is_json_int(value) {
258        Ok(ResolvedTag::Int)
259    } else if is_json_float(value) {
260        Ok(ResolvedTag::Float)
261    } else {
262        Err(UnresolvedScalar)
263    }
264}
265
266// ---------------------------------------------------------------------------
267// Core schema matchers (§10.3.2 tag resolution table)
268// ---------------------------------------------------------------------------
269
270/// `null | Null | NULL | ~ | ""` (YAML 1.2.2 §10.3.2 null row).
271#[must_use]
272pub fn is_core_null(value: &str) -> bool {
273    matches!(value, "null" | "Null" | "NULL" | "~" | "")
274}
275
276/// `true | True | TRUE | false | False | FALSE` (§10.3.2 bool row).
277#[must_use]
278pub fn is_core_bool(value: &str) -> bool {
279    matches!(
280        value,
281        "true" | "True" | "TRUE" | "false" | "False" | "FALSE"
282    )
283}
284
285/// Decimal `[-+]?[0-9]+`, octal `0o[0-7]+`, hex `0x[0-9a-fA-F]+` (§10.3.2
286/// int rows).  Leading zeros in decimal (e.g. `007`) are rejected.
287///
288/// The `[-+]?` prefix appears **only** on the decimal row of the §10.3.2 table.
289/// Octal and hex rows are unsigned — a leading sign (`-0o10`, `+0xFF`) does not
290/// match any int row and must resolve to `!!str` instead.
291///
292/// **Sign gate:** the signed-prefix check gates immediately after stripping the
293/// sign — if the rest starts with `0o` or `0x`, the sign cannot belong to an
294/// int row and returns `false` without further scanning.
295///
296/// **Leading-zero rejection:** decimal integers with more than one digit that
297/// start with `0` (e.g., `007`) are rejected — `0` alone is the only
298/// single-`0` decimal form.  This is stricter than the YAML 1.2.2 spec permits
299/// but is intentional: `007` is ambiguous with YAML 1.1 octal literals, and
300/// rejection enables the LSP to surface a targeted diagnostic and quick-fix.
301#[must_use]
302pub fn is_core_int(value: &str) -> bool {
303    // Strip optional leading sign; the sign itself is never valid.
304    let rest = value
305        .strip_prefix('-')
306        .or_else(|| value.strip_prefix('+'))
307        .unwrap_or(value);
308
309    if rest.is_empty() {
310        return false;
311    }
312
313    // Per §10.3.2, the sign prefix belongs only to the decimal row.
314    // If a sign was present and the remaining body starts with `0o` or `0x`,
315    // the input matches no int row → fall through to !!str.
316    let signed = rest.len() < value.len();
317    if signed && (rest.starts_with("0o") || rest.starts_with("0x")) {
318        return false;
319    }
320
321    if let Some(oct) = rest.strip_prefix("0o") {
322        // Octal: must have at least one digit after prefix.
323        !oct.is_empty() && oct.bytes().all(|b| matches!(b, b'0'..=b'7'))
324    } else if let Some(hex) = rest.strip_prefix("0x") {
325        // Hex: must have at least one digit after prefix.
326        !hex.is_empty() && hex.bytes().all(|b| b.is_ascii_hexdigit())
327    } else {
328        // Decimal: no leading zeros unless the number is exactly "0".
329        if rest.len() > 1 && rest.starts_with('0') {
330            return false;
331        }
332        rest.bytes().all(|b| b.is_ascii_digit())
333    }
334}
335
336/// Core float: decimal (`[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?`),
337/// infinity (`[-+]?\.inf|\.Inf|\.INF`), not-a-number (`.nan|.NaN|.NAN`)
338/// (§10.3.2 float rows).
339#[must_use]
340pub fn is_core_float(value: &str) -> bool {
341    // Special values.
342    if matches!(value, ".nan" | ".NaN" | ".NAN") {
343        return true;
344    }
345
346    // Strip optional leading sign for inf and decimal.
347    let unsigned = value
348        .strip_prefix('-')
349        .or_else(|| value.strip_prefix('+'))
350        .unwrap_or(value);
351
352    // Infinity: [+-]?.inf | .Inf | .INF
353    if matches!(unsigned, ".inf" | ".Inf" | ".INF") {
354        return true;
355    }
356
357    // Decimal float: (\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?
358    is_core_decimal_float(unsigned)
359}
360
361/// Check whether `s` (already sign-stripped) matches the Core decimal float
362/// pattern: `(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?`.
363fn is_core_decimal_float(s: &str) -> bool {
364    // Split off optional exponent first.
365    let (mantissa, exp_part) = split_exponent(s);
366
367    // Validate exponent if present.
368    if exp_part.is_some_and(|exp| !is_valid_exponent_digits(exp)) {
369        return false;
370    }
371
372    // Mantissa must be either:
373    //   a) \.[0-9]+  — leading-dot form
374    //   b) [0-9]+(\.[0-9]*)?  — digit(s) with optional fractional part
375    if let Some(after_dot) = mantissa.strip_prefix('.') {
376        // Leading-dot form: must have at least one digit after the dot.
377        !after_dot.is_empty() && after_dot.bytes().all(|b| b.is_ascii_digit())
378    } else {
379        // Digit-first form.
380        let (int_part, frac) = mantissa.find('.').map_or((mantissa, None), |pos| {
381            (&mantissa[..pos], Some(&mantissa[pos + 1..]))
382        });
383        if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
384            return false;
385        }
386        // If there's a fractional part it may be empty (e.g. `1.`) or digits.
387        if let Some(frac_digits) = frac {
388            if !frac_digits.bytes().all(|b| b.is_ascii_digit()) {
389                return false;
390            }
391        } else {
392            // No dot at all — only valid if there's an exponent (e.g. `1e10`).
393            // Without an exponent this is just an integer.
394            if exp_part.is_none() {
395                return false;
396            }
397        }
398        true
399    }
400}
401
402/// Split `s` at the first `e` or `E`, returning `(mantissa, Some(exponent_digits))`.
403/// The exponent sign (`+`/`-`) is included in the returned exponent slice.
404fn split_exponent(s: &str) -> (&str, Option<&str>) {
405    s.find(['e', 'E'])
406        .map_or((s, None), |pos| (&s[..pos], Some(&s[pos + 1..])))
407}
408
409/// Validate exponent digits: optional `+`/`-` followed by at least one ASCII digit.
410fn is_valid_exponent_digits(exp: &str) -> bool {
411    let digits = exp.strip_prefix(['-', '+']).unwrap_or(exp);
412    !digits.is_empty() && digits.bytes().all(|b| b.is_ascii_digit())
413}
414
415// ---------------------------------------------------------------------------
416// JSON schema matchers (§10.2.2 tag resolution table)
417// ---------------------------------------------------------------------------
418
419/// JSON null: exactly `"null"` (§10.2.2).
420#[must_use]
421pub fn is_json_null(value: &str) -> bool {
422    value == "null"
423}
424
425/// JSON bool: `"true"` or `"false"` only (§10.2.2).
426#[must_use]
427pub fn is_json_bool(value: &str) -> bool {
428    matches!(value, "true" | "false")
429}
430
431/// JSON int: `0 | -?[1-9][0-9]*` (§10.2.2).
432///
433/// No `+` sign, no octal, no hex, no leading zeros.
434#[must_use]
435pub fn is_json_int(value: &str) -> bool {
436    if value == "0" {
437        return true;
438    }
439    // -?[1-9][0-9]*
440    let rest = value.strip_prefix('-').unwrap_or(value);
441    let mut bytes = rest.bytes();
442    match bytes.next() {
443        // First digit must be 1–9.
444        Some(b'1'..=b'9') => {}
445        _ => return false,
446    }
447    bytes.all(|b| b.is_ascii_digit())
448}
449
450/// JSON float: `-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][-+]?[0-9]+)?` (§10.2.2).
451///
452/// No `+` sign, no leading-dot form, no `.inf`, no `.nan`.
453#[must_use]
454pub fn is_json_float(value: &str) -> bool {
455    // Strip optional leading minus (no + allowed).
456    let unsigned = value.strip_prefix('-').unwrap_or(value);
457
458    // Integer part: `0` or `[1-9][0-9]*`.
459    let after_int = if let Some(rest) = unsigned.strip_prefix('0') {
460        rest
461    } else {
462        let mut bytes = unsigned.bytes();
463        match bytes.next() {
464            Some(b'1'..=b'9') => {}
465            _ => return false,
466        }
467        let consumed = 1 + bytes.take_while(u8::is_ascii_digit).count();
468        &unsigned[consumed..]
469    };
470
471    // Optional fractional part: `\.[0-9]*`
472    let after_frac = after_int.strip_prefix('.').map_or(after_int, |rest| {
473        let digits = rest.bytes().take_while(u8::is_ascii_digit).count();
474        &rest[digits..]
475    });
476
477    // Optional exponent: `[eE][-+]?[0-9]+`
478    let after_exp = if let Some(exp_rest) = after_frac
479        .strip_prefix('e')
480        .or_else(|| after_frac.strip_prefix('E'))
481    {
482        let digits_start = exp_rest.strip_prefix(['-', '+']).unwrap_or(exp_rest);
483        if digits_start.is_empty() || !digits_start.bytes().all(|b| b.is_ascii_digit()) {
484            return false;
485        }
486        ""
487    } else {
488        after_frac
489    };
490
491    // Must have consumed the entire string.
492    after_exp.is_empty()
493}
494
495// ---------------------------------------------------------------------------
496// Tests
497// ---------------------------------------------------------------------------
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502    use crate::event::Chomp;
503    use rstest::rstest;
504
505    // ── 1. ResolvedTag::as_str() ───────────────────────────────────────────
506
507    #[rstest]
508    #[case::str_tag(ResolvedTag::Str, "tag:yaml.org,2002:str")]
509    #[case::int_tag(ResolvedTag::Int, "tag:yaml.org,2002:int")]
510    #[case::float_tag(ResolvedTag::Float, "tag:yaml.org,2002:float")]
511    #[case::bool_tag(ResolvedTag::Bool, "tag:yaml.org,2002:bool")]
512    #[case::null_tag(ResolvedTag::Null, "tag:yaml.org,2002:null")]
513    #[case::seq_tag(ResolvedTag::Seq, "tag:yaml.org,2002:seq")]
514    #[case::map_tag(ResolvedTag::Map, "tag:yaml.org,2002:map")]
515    fn resolved_tag_as_str_returns_uri(#[case] tag: ResolvedTag, #[case] expected: &str) {
516        assert_eq!(tag.as_str(), expected);
517    }
518
519    // ── 2. Core regex matchers ─────────────────────────────────────────────
520
521    // is_core_null — true
522
523    #[rstest]
524    #[case::null_lowercase("null")]
525    #[case::null_titlecase("Null")]
526    #[case::null_uppercase("NULL")]
527    #[case::tilde("~")]
528    #[case::empty("")]
529    fn is_core_null_returns_true(#[case] input: &str) {
530        assert!(is_core_null(input));
531    }
532
533    // is_core_null — false
534
535    #[rstest]
536    #[case::none_string("none")]
537    #[case::nil_string("nil")]
538    #[case::mixed_case_null("nUll")]
539    #[case::single_space(" ")]
540    #[case::json_null_inside_word("nullX")]
541    fn is_core_null_returns_false(#[case] input: &str) {
542        assert!(!is_core_null(input));
543    }
544
545    // is_core_bool — true
546
547    #[rstest]
548    #[case::true_lowercase("true")]
549    #[case::true_titlecase("True")]
550    #[case::true_uppercase("TRUE")]
551    #[case::false_lowercase("false")]
552    #[case::false_titlecase("False")]
553    #[case::false_uppercase("FALSE")]
554    fn is_core_bool_returns_true(#[case] input: &str) {
555        assert!(is_core_bool(input));
556    }
557
558    // is_core_bool — false
559
560    #[rstest]
561    #[case::yaml11_yes("yes")]
562    #[case::yaml11_no("no")]
563    #[case::yaml11_on("on")]
564    #[case::yaml11_off("off")]
565    #[case::mixed_case_true("tRue")]
566    #[case::integer_one("1")]
567    #[case::integer_zero("0")]
568    fn is_core_bool_returns_false(#[case] input: &str) {
569        assert!(!is_core_bool(input));
570    }
571
572    // is_core_int — true
573
574    #[rstest]
575    #[case::decimal_zero("0")]
576    #[case::decimal_positive("42")]
577    #[case::decimal_negative("-1")]
578    #[case::decimal_plus_prefix("+100")]
579    #[case::decimal_signed_negative("-42")]
580    #[case::decimal_signed_positive("+42")]
581    #[case::octal("0o17")]
582    #[case::octal_unsigned("0o10")]
583    #[case::hex_lower("0xff")]
584    #[case::hex_upper("0xFF")]
585    fn is_core_int_returns_true(#[case] input: &str) {
586        assert!(is_core_int(input));
587    }
588
589    // is_core_int — false
590
591    #[rstest]
592    #[case::leading_zeros("007")]
593    #[case::empty("")]
594    #[case::sign_only_plus("+")]
595    #[case::sign_only_minus("-")]
596    #[case::float_with_dot("3.14")]
597    #[case::float_exp("1e5")]
598    #[case::octal_prefix_only("0o")]
599    #[case::hex_prefix_only("0x")]
600    #[case::alpha_string("abc")]
601    // §10.3.2: sign is decimal-only; signed octal/hex must fall through to !!str
602    #[case::signed_octal_negative("-0o10")]
603    #[case::signed_octal_positive("+0o10")]
604    #[case::signed_hex_negative("-0xFF")]
605    #[case::signed_hex_positive("+0xFF")]
606    fn is_core_int_returns_false(#[case] input: &str) {
607        assert!(!is_core_int(input));
608    }
609
610    // is_core_float — true
611
612    #[rstest]
613    #[case::decimal_dot("3.14")]
614    #[case::decimal_no_integer_part(".5")]
615    #[case::exponent_only("1e10")]
616    #[case::exponent_negative("1.5E-3")]
617    #[case::positive_signed_float("+1.0")]
618    #[case::negative_float("-0.5")]
619    #[case::inf_lowercase(".inf")]
620    #[case::inf_titlecase(".Inf")]
621    #[case::inf_uppercase(".INF")]
622    #[case::neg_inf_lowercase("-.inf")]
623    #[case::neg_inf_titlecase("-.Inf")]
624    #[case::neg_inf_uppercase("-.INF")]
625    #[case::pos_inf("+.inf")]
626    #[case::nan_lowercase(".nan")]
627    #[case::nan_titlecase(".NaN")]
628    #[case::nan_uppercase(".NAN")]
629    fn is_core_float_returns_true(#[case] input: &str) {
630        assert!(is_core_float(input));
631    }
632
633    // is_core_float — false
634
635    #[rstest]
636    #[case::bare_integer("42")]
637    #[case::empty("")]
638    #[case::bare_inf_no_dot("inf")]
639    #[case::bare_nan_no_dot("nan")]
640    #[case::sign_only("+")]
641    #[case::dot_only(".")]
642    fn is_core_float_returns_false(#[case] input: &str) {
643        assert!(!is_core_float(input));
644    }
645
646    // ── 3. JSON regex matchers ─────────────────────────────────────────────
647
648    // is_json_null
649
650    #[test]
651    fn is_json_null_returns_true() {
652        assert!(is_json_null("null"));
653    }
654
655    #[rstest]
656    #[case::null_titlecase("Null")]
657    #[case::null_uppercase("NULL")]
658    #[case::tilde("~")]
659    #[case::empty("")]
660    fn is_json_null_returns_false(#[case] input: &str) {
661        assert!(!is_json_null(input));
662    }
663
664    // is_json_bool
665
666    #[rstest]
667    #[case::true_lowercase("true")]
668    #[case::false_lowercase("false")]
669    fn is_json_bool_returns_true(#[case] input: &str) {
670        assert!(is_json_bool(input));
671    }
672
673    #[rstest]
674    #[case::true_titlecase("True")]
675    #[case::true_uppercase("TRUE")]
676    #[case::false_titlecase("False")]
677    #[case::false_uppercase("FALSE")]
678    fn is_json_bool_returns_false(#[case] input: &str) {
679        assert!(!is_json_bool(input));
680    }
681
682    // is_json_int
683
684    #[rstest]
685    #[case::zero("0")]
686    #[case::positive_decimal("42")]
687    #[case::negative_decimal("-1")]
688    #[case::negative_multi("-100")]
689    #[case::large_negative("-9999")]
690    fn is_json_int_returns_true(#[case] input: &str) {
691        assert!(is_json_int(input));
692    }
693
694    #[rstest]
695    #[case::plus_prefix("+42")]
696    #[case::plus_zero("+0")]
697    #[case::minus_zero("-0")]
698    #[case::leading_zeros("007")]
699    #[case::octal("0o17")]
700    #[case::hex("0xFF")]
701    #[case::empty("")]
702    #[case::sign_only_plus("+")]
703    #[case::sign_only_minus("-")]
704    fn is_json_int_returns_false(#[case] input: &str) {
705        assert!(!is_json_int(input));
706    }
707
708    // is_json_float
709
710    #[rstest]
711    #[case::zero_float_simple("0.5")]
712    #[case::negative_with_decimal("-1.5")]
713    #[case::with_exponent("1e10")]
714    #[case::with_negative_exponent("-1.5e-3")]
715    // `-0` matches `-?(0)` with no fractional/exponent — valid JSON float.
716    #[case::minus_zero("-0")]
717    // bare `0` matches the integer part with no fractional or exponent.
718    #[case::zero_alone("0")]
719    fn is_json_float_returns_true(#[case] input: &str) {
720        assert!(is_json_float(input));
721    }
722
723    #[rstest]
724    #[case::plus_prefix("+1.5")]
725    #[case::inf_dot(".inf")]
726    #[case::nan_dot(".nan")]
727    #[case::leading_dot(".5")]
728    #[case::empty("")]
729    #[case::sign_only("-")]
730    fn is_json_float_returns_false(#[case] input: &str) {
731        assert!(!is_json_float(input));
732    }
733
734    // ── 4. resolve_scalar ─────────────────────────────────────────────────
735
736    // 4a. Failsafe schema
737
738    #[rstest]
739    #[case::plain_null(ScalarStyle::Plain, "null", None)]
740    #[case::single_quoted_true(ScalarStyle::SingleQuoted, "true", None)]
741    #[case::double_quoted_int(ScalarStyle::DoubleQuoted, "42", None)]
742    #[case::literal_block(ScalarStyle::Literal(Chomp::Clip), "hello", None)]
743    #[case::folded_block(ScalarStyle::Folded(Chomp::Strip), "world", None)]
744    fn resolve_scalar_failsafe_always_str(
745        #[case] style: ScalarStyle,
746        #[case] value: &str,
747        #[case] source_tag: Option<&str>,
748    ) {
749        assert_eq!(
750            resolve_scalar(Schema::Failsafe, style, value, source_tag),
751            Ok(Some(ResolvedTag::Str))
752        );
753    }
754
755    #[test]
756    fn resolve_scalar_failsafe_explicit_tag_passthrough() {
757        let result = resolve_scalar(
758            Schema::Failsafe,
759            ScalarStyle::Plain,
760            "null",
761            Some("tag:yaml.org,2002:str"),
762        );
763        assert_eq!(result, Ok(None));
764    }
765
766    // 4b. Core schema
767
768    #[rstest]
769    #[case::plain_null_lowercase(ScalarStyle::Plain, "null", None, ResolvedTag::Null)]
770    #[case::plain_null_tilde(ScalarStyle::Plain, "~", None, ResolvedTag::Null)]
771    #[case::plain_null_empty(ScalarStyle::Plain, "", None, ResolvedTag::Null)]
772    #[case::plain_bool_true_lower(ScalarStyle::Plain, "true", None, ResolvedTag::Bool)]
773    #[case::plain_bool_false_upper(ScalarStyle::Plain, "FALSE", None, ResolvedTag::Bool)]
774    #[case::plain_int_decimal(ScalarStyle::Plain, "42", None, ResolvedTag::Int)]
775    #[case::plain_int_octal(ScalarStyle::Plain, "0o17", None, ResolvedTag::Int)]
776    #[case::plain_int_hex(ScalarStyle::Plain, "0xFF", None, ResolvedTag::Int)]
777    #[case::plain_float_decimal(ScalarStyle::Plain, "3.14", None, ResolvedTag::Float)]
778    #[case::plain_float_inf(ScalarStyle::Plain, ".inf", None, ResolvedTag::Float)]
779    #[case::plain_float_nan(ScalarStyle::Plain, ".nan", None, ResolvedTag::Float)]
780    #[case::plain_unmatched_str(ScalarStyle::Plain, "hello", None, ResolvedTag::Str)]
781    #[case::plain_leading_zeros(ScalarStyle::Plain, "007", None, ResolvedTag::Str)]
782    // §10.3.2: signed octal/hex fall through to !!str (sign is decimal-only)
783    #[case::signed_octal_is_str(ScalarStyle::Plain, "-0o10", None, ResolvedTag::Str)]
784    #[case::signed_hex_is_str(ScalarStyle::Plain, "+0xFF", None, ResolvedTag::Str)]
785    #[case::single_quoted_null(ScalarStyle::SingleQuoted, "null", None, ResolvedTag::Str)]
786    #[case::double_quoted_true(ScalarStyle::DoubleQuoted, "true", None, ResolvedTag::Str)]
787    #[case::literal_any(ScalarStyle::Literal(Chomp::Clip), "42", None, ResolvedTag::Str)]
788    #[case::folded_any(ScalarStyle::Folded(Chomp::Keep), "null", None, ResolvedTag::Str)]
789    fn resolve_scalar_core(
790        #[case] style: ScalarStyle,
791        #[case] value: &str,
792        #[case] source_tag: Option<&str>,
793        #[case] expected: ResolvedTag,
794    ) {
795        assert_eq!(
796            resolve_scalar(Schema::Core, style, value, source_tag),
797            Ok(Some(expected))
798        );
799    }
800
801    #[test]
802    fn resolve_scalar_core_explicit_tag_passthrough() {
803        let result = resolve_scalar(
804            Schema::Core,
805            ScalarStyle::Plain,
806            "null",
807            Some("tag:yaml.org,2002:int"),
808        );
809        assert_eq!(result, Ok(None));
810    }
811
812    // 4c. JSON schema
813
814    #[rstest]
815    // null
816    #[case::plain_null_lowercase(ScalarStyle::Plain, "null", None, Ok(Some(ResolvedTag::Null)))]
817    // JSON rejects Core-only null forms
818    #[case::plain_null_tilde_rejected(ScalarStyle::Plain, "~", None, Err(UnresolvedScalar))]
819    #[case::plain_empty_rejected(ScalarStyle::Plain, "", None, Err(UnresolvedScalar))]
820    // bool
821    #[case::plain_bool_true_lower(ScalarStyle::Plain, "true", None, Ok(Some(ResolvedTag::Bool)))]
822    #[case::plain_bool_true_upper_rejected(ScalarStyle::Plain, "TRUE", None, Err(UnresolvedScalar))]
823    // int
824    #[case::plain_int_decimal(ScalarStyle::Plain, "42", None, Ok(Some(ResolvedTag::Int)))]
825    #[case::plain_int_zero(ScalarStyle::Plain, "0", None, Ok(Some(ResolvedTag::Int)))]
826    #[case::plain_int_negative(ScalarStyle::Plain, "-1", None, Ok(Some(ResolvedTag::Int)))]
827    #[case::plain_int_plus_rejected(ScalarStyle::Plain, "+42", None, Err(UnresolvedScalar))]
828    // -0: not a JSON int; dispatched to float (matches `-?(0)` with no fractional/exp)
829    #[case::plain_minus_zero_is_float(ScalarStyle::Plain, "-0", None, Ok(Some(ResolvedTag::Float)))]
830    #[case::plain_octal_rejected(ScalarStyle::Plain, "0o17", None, Err(UnresolvedScalar))]
831    #[case::plain_hex_rejected(ScalarStyle::Plain, "0xFF", None, Err(UnresolvedScalar))]
832    // float
833    #[case::plain_float_decimal(ScalarStyle::Plain, "1.5", None, Ok(Some(ResolvedTag::Float)))]
834    #[case::plain_float_inf_rejected(ScalarStyle::Plain, ".inf", None, Err(UnresolvedScalar))]
835    #[case::plain_float_nan_rejected(ScalarStyle::Plain, ".nan", None, Err(UnresolvedScalar))]
836    #[case::plain_float_plus_rejected(ScalarStyle::Plain, "+1.5", None, Err(UnresolvedScalar))]
837    // unmatched
838    #[case::plain_unmatched_rejected(ScalarStyle::Plain, "hello", None, Err(UnresolvedScalar))]
839    // non-plain styles → Str (no pattern matching)
840    #[case::single_quoted_becomes_str(
841        ScalarStyle::SingleQuoted,
842        "null",
843        None,
844        Ok(Some(ResolvedTag::Str))
845    )]
846    #[case::double_quoted_becomes_str(
847        ScalarStyle::DoubleQuoted,
848        "true",
849        None,
850        Ok(Some(ResolvedTag::Str))
851    )]
852    #[case::literal_becomes_str(
853        ScalarStyle::Literal(Chomp::Clip),
854        "42",
855        None,
856        Ok(Some(ResolvedTag::Str))
857    )]
858    #[case::folded_becomes_str(
859        ScalarStyle::Folded(Chomp::Strip),
860        "null",
861        None,
862        Ok(Some(ResolvedTag::Str))
863    )]
864    fn resolve_scalar_json(
865        #[case] style: ScalarStyle,
866        #[case] value: &str,
867        #[case] source_tag: Option<&str>,
868        #[case] expected: Result<Option<ResolvedTag>, UnresolvedScalar>,
869    ) {
870        assert_eq!(
871            resolve_scalar(Schema::Json, style, value, source_tag),
872            expected
873        );
874    }
875
876    #[test]
877    fn resolve_scalar_json_explicit_tag_passthrough() {
878        let result = resolve_scalar(Schema::Json, ScalarStyle::Plain, "null", Some("!custom"));
879        assert_eq!(result, Ok(None));
880    }
881
882    // 4d. source_tag passthrough — cross-schema
883
884    #[test]
885    fn resolve_scalar_explicit_tag_returns_none_failsafe() {
886        assert_eq!(
887            resolve_scalar(
888                Schema::Failsafe,
889                ScalarStyle::Plain,
890                "null",
891                Some("anything")
892            ),
893            Ok(None)
894        );
895    }
896
897    #[test]
898    fn resolve_scalar_explicit_tag_returns_none_json() {
899        assert_eq!(
900            resolve_scalar(Schema::Json, ScalarStyle::Plain, "null", Some("anything")),
901            Ok(None)
902        );
903    }
904
905    #[test]
906    fn resolve_scalar_explicit_tag_returns_none_core() {
907        assert_eq!(
908            resolve_scalar(Schema::Core, ScalarStyle::Plain, "null", Some("anything")),
909            Ok(None)
910        );
911    }
912
913    // ── 5. resolve_collection ─────────────────────────────────────────────
914
915    #[rstest]
916    #[case::failsafe_sequence_no_tag(
917        Schema::Failsafe,
918        CollectionKind::Sequence,
919        None,
920        Some(ResolvedTag::Seq)
921    )]
922    #[case::failsafe_mapping_no_tag(
923        Schema::Failsafe,
924        CollectionKind::Mapping,
925        None,
926        Some(ResolvedTag::Map)
927    )]
928    #[case::json_sequence_no_tag(
929        Schema::Json,
930        CollectionKind::Sequence,
931        None,
932        Some(ResolvedTag::Seq)
933    )]
934    #[case::json_mapping_no_tag(
935        Schema::Json,
936        CollectionKind::Mapping,
937        None,
938        Some(ResolvedTag::Map)
939    )]
940    #[case::core_sequence_no_tag(
941        Schema::Core,
942        CollectionKind::Sequence,
943        None,
944        Some(ResolvedTag::Seq)
945    )]
946    #[case::core_mapping_no_tag(
947        Schema::Core,
948        CollectionKind::Mapping,
949        None,
950        Some(ResolvedTag::Map)
951    )]
952    #[case::failsafe_sequence_explicit_tag(
953        Schema::Failsafe,
954        CollectionKind::Sequence,
955        Some("!custom"),
956        None
957    )]
958    #[case::failsafe_mapping_explicit_tag(
959        Schema::Failsafe,
960        CollectionKind::Mapping,
961        Some("tag:yaml.org,2002:map"),
962        None
963    )]
964    #[case::core_sequence_explicit_tag(Schema::Core, CollectionKind::Sequence, Some("!seq"), None)]
965    #[case::json_mapping_explicit_tag(Schema::Json, CollectionKind::Mapping, Some("!map"), None)]
966    fn resolve_collection_dispatch(
967        #[case] schema: Schema,
968        #[case] kind: CollectionKind,
969        #[case] source_tag: Option<&str>,
970        #[case] expected: Option<ResolvedTag>,
971    ) {
972        assert_eq!(resolve_collection(schema, kind, source_tag), expected);
973    }
974}