serde-saphyr 0.0.19

YAML (de)serializer for Serde, emphasizing panic-free parsing and good error reporting
Documentation
//! Span-aware wrapper types.
//!
//! `Spanned<T>` lets you deserialize a value together with the source location
//! (line/column) of the YAML node it came from.
//!
//! This is especially useful for config validation errors, where you want to
//! point at the exact place in the YAML. Many configuration errors are not kind
//! of "invalid YAML" but rather "valid YAML, still invalid value". Using
//! Spanned allows to tell where the invalid value comes from.
//!
//! ```rust
//! use serde::Deserialize;
//!
//! #[derive(Debug, Deserialize)]
//! struct Cfg {
//!     timeout: serde_saphyr::Spanned<u64>,
//! }
//!
//! let cfg: Cfg = serde_saphyr::from_str("timeout: 5\n").unwrap();
//! assert_eq!(cfg.timeout.value, 5);
//! assert_eq!(cfg.timeout.referenced.line(), 1);
//! assert_eq!(cfg.timeout.referenced.column(), 10);
//! ```

use serde::de::{self, Deserializer};
use serde::{Deserialize, Serialize};

use crate::Location;

/// A value paired with source locations describing where it came from. Spanned location
/// is specified in character positions and, when possible, in byte offsets as well (byte offsets
/// are available for a string source but not from the reader.
///
/// # Example
///
/// ```rust
/// use serde::Deserialize;
///
/// #[derive(Debug, Deserialize)]
/// struct Cfg {
///     timeout: serde_saphyr::Spanned<u64>,
/// }
///
/// let cfg: Cfg = serde_saphyr::from_str("timeout: 5\n").unwrap();
/// assert_eq!(cfg.timeout.value, 5);
/// assert_eq!(cfg.timeout.referenced.line(), 1);
/// assert_eq!(cfg.timeout.referenced.column(), 10);
/// ```
///
/// # Location semantics for YAML aliases and merges
///
/// `Spanned<T>` exposes two locations:
///
/// - `referenced`: where the value is referenced/used in the YAML.
///   - For aliases (`*a`): this is the location of the alias token.
///   - For merge-derived values (`<<`): this is the location of the merge entry
///     (typically the `<<: *a` site).
/// - `defined`: where the value is defined in YAML.
///   - For plain values: equals `referenced`.
///   - For aliases: points to the anchored definition.
///   - For merge-derived values: points to the originating scalar in the merged
///     mapping.
///
/// # Limitation with certain enum representations
///
/// `Spanned<T>` **cannot** be used inside variants of enums that use:
/// - `#[serde(untagged)]` - untagged enums
/// - `#[serde(tag = "...")]` - internally tagged enums
///
/// This is a fundamental limitation of how serde handles these enum types: serde
/// buffers the content and replays it through a generic `ContentDeserializer`
/// that doesn't recognize the special `__yaml_spanned` marker.
///
/// ## Workaround: Wrap the entire enum
///
/// Instead of putting `Spanned<T>` inside each variant, wrap the whole enum:
///
/// ```rust
/// use serde::Deserialize;
/// use serde_saphyr::Spanned;
///
/// #[derive(Debug, Deserialize)]
/// #[serde(untagged)]
/// pub enum Payload {
///     StringVariant { message: String },
///     IntVariant { count: u32 },
/// }
///
/// // Use Spanned<Payload> instead of Spanned<T> inside variants
/// let yaml = "message: hello";
/// let result: Spanned<Payload> = serde_saphyr::from_str(yaml).unwrap();
/// assert_eq!(result.referenced.line(), 1);
/// ```
///
/// ## Alternative: Use externally tagged enums (serde default)
///
/// Externally tagged enums (the default) work with `Spanned<T>` inside variants:
///
/// ```rust
/// use serde::Deserialize;
/// use serde_saphyr::Spanned;
///
/// #[derive(Debug, Deserialize)]
/// pub enum Payload {
///     StringVariant { message: Spanned<String> },
///     IntVariant { count: Spanned<u32> },
/// }
///
/// let yaml = "StringVariant:\n  message: hello";
/// let result: Payload = serde_saphyr::from_str(yaml).unwrap();
/// match result {
///     Payload::StringVariant { message } => {
///         assert_eq!(&message.value, "hello");
///         assert_eq!(message.referenced.line(), 2);
///     }
///     _ => panic!("Expected StringVariant"),
/// }
/// ```
///
/// ## Alternative: Use adjacently tagged enums
///
/// Adjacently tagged enums also work with `Spanned<T>` inside variants:
///
/// ```rust
/// use serde::Deserialize;
/// use serde_saphyr::Spanned;
///
/// #[derive(Debug, Deserialize)]
/// #[serde(tag = "type", content = "data")]
/// pub enum Payload {
///     StringVariant { message: Spanned<String> },
///     IntVariant { count: Spanned<u32> },
/// }
///
/// let yaml = "type: StringVariant\ndata:\n  message: hello";
/// let result: Payload = serde_saphyr::from_str(yaml).unwrap();
/// match result {
///     Payload::StringVariant { message } => {
///         assert_eq!(&message.value, "hello");
///         assert_eq!(message.referenced.line(), 3);
///     }
///     _ => panic!("Expected StringVariant"),
/// }
/// ```
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Spanned<T> {
    pub value: T,
    pub referenced: Location,
    pub defined: Location,
}

impl<T> Spanned<T> {
    pub const fn new(value: T, referenced: Location, defined: Location) -> Self {
        Self {
            value,
            referenced,
            defined,
        }
    }
}

impl<'de, T> Deserialize<'de> for Spanned<T>
where
    T: Deserialize<'de>,
{
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        struct SpannedVisitor<T>(std::marker::PhantomData<T>);

        impl<'de, T> de::Visitor<'de> for SpannedVisitor<T>
        where
            T: Deserialize<'de>,
        {
            type Value = Spanned<T>;

            fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                f.write_str("a span-aware newtype wrapper")
            }

            fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
            where
                D: Deserializer<'de>,
            {
                #[derive(Deserialize)]
                struct Repr<T> {
                    value: T,
                    referenced: Location,
                    defined: Location,
                }

                Repr::<T>::deserialize(deserializer)
                    .map_err(|e| {
                        let msg = e.to_string();
                        // Detect the specific case where Spanned<T> is used inside an untagged
                        // or internally tagged enum. In that case, serde's ContentDeserializer
                        // doesn't provide the `referenced` and `defined` fields that our
                        // YAML deserializer normally injects.
                        if msg.contains("missing field `referenced`")
                            || msg.contains("missing field `defined`")
                        {
                            de::Error::custom(
                                "Spanned<T> cannot be used inside #[serde(untagged)] or \
                                 #[serde(tag = \"...\")] enum variants. Consider wrapping the \
                                 entire enum with Spanned<YourEnum>, or use externally tagged \
                                 or adjacently tagged enums instead."
                            )
                        } else {
                            e
                        }
                    })
                    .map(|repr| Spanned::new(repr.value, repr.referenced, repr.defined))
            }
        }

        deserializer
            .deserialize_newtype_struct("__yaml_spanned", SpannedVisitor(std::marker::PhantomData))
    }
}

impl<T> Serialize for Spanned<T>
where
    T: Serialize,
{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        // `Spanned<T>` is a deserialization helper that records source locations.
        // When serializing, we emit the wrapped value only.
        self.value.serialize(serializer)
    }
}