Skip to main content

serde_saphyr/de/
input_source.rs

1use std::io::Read;
2
3/// Owned input that can be fed into the YAML parser.
4///
5/// This is primarily used by the include resolver: it can return either fully-owned
6/// in-memory text, or a fully-owned streaming reader.
7pub enum InputSource {
8    /// Owned text.
9    Text(String),
10    /// Owned YAML text together with the name of an anchor to extract from it.
11    ///
12    /// This is mainly intended for resolvers that support include specs such as
13    /// `path/to/file.yaml#anchor_name`. In that case, the resolver still receives the full
14    /// include spec via [`IncludeRequest::spec`], splits the file part from the fragment itself,
15    /// reads the target document, and returns:
16    ///
17    /// ```rust
18    /// # use serde_saphyr::InputSource;
19    /// let source = InputSource::AnchoredText {
20    ///     text: "defaults: &defaults\n  enabled: true\nfeature: *defaults\n".to_owned(),
21    ///     anchor: "defaults".to_owned(),
22    /// };
23    /// ```
24    ///
25    /// During parsing, `serde-saphyr` will parse `text`, find the node tagged with `&defaults`,
26    /// and replay only that anchored node as the included value. Conceptually, this makes:
27    ///
28    /// ```yaml
29    /// settings: !include config.yaml#defaults
30    /// ```
31    ///
32    /// behave as if `settings` directly contained the YAML node anchored as `&defaults` inside
33    /// `config.yaml`.
34    ///
35    /// Use [`InputSource::Text`] when the whole document should be included, and use
36    /// [`InputSource::AnchoredText`] only when you want the include to resolve to a specific
37    /// anchored fragment.
38    AnchoredText { text: String, anchor: String },
39    /// Owned reader (streaming).
40    Reader(Box<dyn Read + 'static>),
41}
42
43impl std::fmt::Debug for InputSource {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            Self::Text(text) => f.debug_tuple("Text").field(text).finish(),
47            Self::AnchoredText { text, anchor } => f
48                .debug_struct("AnchoredText")
49                .field("anchor", anchor)
50                .field("text", text)
51                .finish(),
52            Self::Reader(_) => f.write_str("Reader(..)"),
53        }
54    }
55}
56
57/// A resolved include containing the source identity and the content.
58#[derive(Debug)]
59pub struct ResolvedInclude {
60    /// The canonical identity of the included source, used for cycle detection and absolute paths.
61    pub id: String,
62    /// The display name of the included source, used for error messages.
63    pub name: String,
64    /// The actual content to parse.
65    pub source: InputSource,
66}
67
68/// Specific problems encountered during file include resolution.
69#[derive(Debug)]
70#[non_exhaustive]
71pub enum ResolveProblem {
72    /// Failed to canonicalize the include target path.
73    ResolveFailed {
74        spec: String,
75        base_dir: String,
76        err: std::io::Error,
77    },
78    /// The include target is not a regular file.
79    TargetNotRegularFile { target: String },
80    /// The include target resolves to the configured root file itself (cyclic include).
81    TargetIsRootFile { spec: String },
82    /// The parent include id was not an absolute canonical path.
83    ParentIdNotAbsoluteCanonical { parent_id: String },
84    /// Failed to resolve the parent include source.
85    ParentResolveFailed {
86        parent_id: String,
87        from_name: String,
88        err: std::io::Error,
89    },
90    /// The parent include is not a regular file.
91    ParentNotRegularFile { parent: String },
92    /// The parent include does not have a parent directory.
93    ParentHasNoDirectory { parent: String },
94    /// The include resolves outside the configured root directory.
95    ResolvesOutsideRoot { spec: String, root: String },
96    /// The include traverses a symlink, which is disabled by policy.
97    TraversesSymlink { spec: String },
98    /// Absolute include paths are not allowed.
99    AbsolutePathNotAllowed { spec: String },
100    /// The include path is empty.
101    EmptyPath,
102    /// The include target does not have a valid YAML extension (.yml or .yaml).
103    InvalidExtension { spec: String },
104    /// The include target is a hidden file (starts with a dot).
105    HiddenFile { spec: String },
106    /// The include fragment is empty.
107    EmptyFragment,
108    /// The include fragment contains a '#' character.
109    FragmentContainsHash { spec: String },
110}
111
112/// Error type returned by user-provided include resolvers.
113#[derive(Debug)]
114#[non_exhaustive]
115pub enum IncludeResolveError {
116    Io(std::io::Error),
117    Message(String),
118    SizeLimitExceeded(usize, usize),
119    FileInclude(Box<ResolveProblem>),
120}
121
122impl From<std::io::Error> for IncludeResolveError {
123    fn from(value: std::io::Error) -> Self {
124        Self::Io(value)
125    }
126}
127
128/// A request passed to the include resolver to resolve an include directive.
129pub struct IncludeRequest<'a> {
130    /// The include specification (e.g. the path or URL).
131    pub spec: &'a str,
132    /// The name of the file or source currently being parsed (top of the include stack).
133    pub from_name: &'a str,
134    /// The canonical identity of the source currently being parsed, or None for the root parser.
135    pub from_id: Option<&'a str>,
136    /// The full chain of inclusions leading to this request, with the current file at the end.
137    pub stack: Vec<String>,
138    /// Remaining decoded byte quota available for additional reader-backed input, if configured.
139    pub size_remaining: Option<usize>,
140    /// The location in the source file where the include was requested.
141    pub location: crate::Location,
142}
143
144/// Callback used to resolve `!include` directives during parsing.
145///
146/// The resolver receives an [`IncludeRequest`] describing what was requested, from which
147/// source it originated, and where in the source file the directive was encountered. It must
148/// either return a [`ResolvedInclude`] with a stable `id`, human-friendly `name`, and the
149/// replacement [`InputSource`], or fail with [`IncludeResolveError`].
150///
151/// The `id` should uniquely identify the underlying resource after any normalization you need
152/// (for example, a canonical filesystem path or a normalized URL). `serde-saphyr` uses this
153/// identifier for include-stack tracking and cycle detection. The `name` is intended for error
154/// messages and can be more user-friendly.
155///
156/// Resolvers may return:
157/// - [`InputSource::Text`] for ordinary in-memory YAML,
158/// - [`InputSource::AnchoredText`] when the include should behave as if a specific anchor was
159///   the first parsed node, or
160/// - [`InputSource::Reader`] when content should be streamed from an owned reader.
161///
162/// A resolver is invoked lazily, when a `!include` tag is encountered. Because the type is
163/// `FnMut`, the callback may keep state such as caches, metrics, or a virtual file map.
164///
165/// ```rust
166/// # #[cfg(feature = "include")]
167/// # {
168/// use serde::Deserialize;
169/// use serde_saphyr::{
170///     from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
171///     ResolvedInclude,
172/// };
173///
174/// #[derive(Debug, Deserialize, PartialEq)]
175/// struct Config {
176///     users: Vec<User>,
177/// }
178///
179/// #[derive(Debug, Deserialize, PartialEq)]
180/// struct User {
181///     name: String,
182/// }
183///
184/// let root_yaml = "users: !include virtual://users.yaml\n";
185/// let users_yaml = "- name: Alice\n- name: Bob\n";
186///
187/// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
188///     assert_eq!(req.spec, "virtual://users.yaml");
189///     assert_eq!(req.from_name, "<input>");
190///
191///     if req.spec == "virtual://users.yaml" {
192///         Ok(ResolvedInclude {
193///             id: req.spec.to_owned(),
194///             name: "virtual users".to_owned(),
195///             source: InputSource::from_string(users_yaml.to_owned()),
196///         })
197///     } else {
198///         Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
199///     }
200/// });
201///
202/// let config: Config = from_str_with_options(root_yaml, options).unwrap();
203/// assert_eq!(config.users.len(), 2);
204/// assert_eq!(config.users[0].name, "Alice");
205/// # }
206/// ```
207pub type IncludeResolver<'a> =
208    dyn FnMut(IncludeRequest<'_>) -> Result<ResolvedInclude, IncludeResolveError> + 'a;
209
210impl InputSource {
211    #[inline]
212    pub fn from_string(s: String) -> Self {
213        Self::Text(s)
214    }
215
216    #[inline]
217    pub fn from_reader<R>(r: R) -> Self
218    where
219        R: Read + 'static,
220    {
221        Self::Reader(Box::new(r))
222    }
223}