Skip to main content

serde_saphyr/de/
input_source.rs

1use std::io::Read;
2
3/// Owned input that can be fed into the YAML parser.
4///
5/// This is primarily used by the include resolver: it can return either fully-owned
6/// in-memory text, or a fully-owned streaming reader.
7#[non_exhaustive]
8pub enum InputSource {
9    /// Owned text.
10    Text(String),
11    /// Owned YAML text together with the name of an anchor to extract from it.
12    ///
13    /// This is mainly intended for resolvers that support include specs such as
14    /// `path/to/file.yaml#anchor_name`. In that case, the resolver still receives the full
15    /// include spec via [`IncludeRequest::spec`], splits the file part from the fragment itself,
16    /// reads the target document, and returns:
17    ///
18    /// ```rust
19    /// # use serde_saphyr::InputSource;
20    /// let source = InputSource::AnchoredText {
21    ///     text: "defaults: &defaults\n  enabled: true\nfeature: *defaults\n".to_owned(),
22    ///     anchor: "defaults".to_owned(),
23    /// };
24    /// ```
25    ///
26    /// During parsing, `serde-saphyr` will parse `text`, find the node tagged with `&defaults`,
27    /// and replay only that anchored node as the included value. Conceptually, this makes:
28    ///
29    /// ```yaml
30    /// settings: !include config.yaml#defaults
31    /// ```
32    ///
33    /// behave as if `settings` directly contained the YAML node anchored as `&defaults` inside
34    /// `config.yaml`.
35    ///
36    /// Use [`InputSource::Text`] when the whole document should be included, and use
37    /// [`InputSource::AnchoredText`] only when you want the include to resolve to a specific
38    /// anchored fragment.
39    AnchoredText { text: String, anchor: String },
40    /// Owned reader (streaming).
41    Reader(Box<dyn Read + 'static>),
42}
43
44impl std::fmt::Debug for InputSource {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        match self {
47            Self::Text(text) => f.debug_tuple("Text").field(text).finish(),
48            Self::AnchoredText { text, anchor } => f
49                .debug_struct("AnchoredText")
50                .field("anchor", anchor)
51                .field("text", text)
52                .finish(),
53            Self::Reader(_) => f.write_str("Reader(..)"),
54        }
55    }
56}
57
58/// A resolved include containing the source identity and the content.
59#[derive(Debug)]
60pub struct ResolvedInclude {
61    /// The canonical identity of the included source, used for cycle detection and absolute paths.
62    pub id: String,
63    /// The display name of the included source, used for error messages.
64    pub name: String,
65    /// The actual content to parse.
66    pub source: InputSource,
67}
68
69/// Specific problems encountered during file include resolution.
70#[derive(Debug)]
71#[non_exhaustive]
72pub enum ResolveProblem {
73    /// Failed to canonicalize the include target path.
74    ResolveFailed {
75        spec: String,
76        base_dir: String,
77        err: std::io::Error,
78    },
79    /// The include target is not a regular file.
80    TargetNotRegularFile { target: String },
81    /// The include target resolves to the configured root file itself (cyclic include).
82    TargetIsRootFile { spec: String },
83    /// The parent include id was not an absolute canonical path.
84    ParentIdNotAbsoluteCanonical { parent_id: String },
85    /// Failed to resolve the parent include source.
86    ParentResolveFailed {
87        parent_id: String,
88        from_name: String,
89        err: std::io::Error,
90    },
91    /// The parent include is not a regular file.
92    ParentNotRegularFile { parent: String },
93    /// The parent include does not have a parent directory.
94    ParentHasNoDirectory { parent: String },
95    /// The include resolves outside the configured root directory.
96    ResolvesOutsideRoot { spec: String, root: String },
97    /// The include traverses a symlink, which is disabled by policy.
98    TraversesSymlink { spec: String },
99    /// Absolute include paths are not allowed.
100    AbsolutePathNotAllowed { spec: String },
101    /// The include path is empty.
102    EmptyPath,
103    /// The include target does not have a valid YAML extension (.yml or .yaml).
104    InvalidExtension { spec: String },
105    /// The include target is a hidden file (starts with a dot).
106    HiddenFile { spec: String },
107    /// The include fragment is empty.
108    EmptyFragment,
109    /// The include fragment contains a '#' character.
110    FragmentContainsHash { spec: String },
111}
112
113/// Error type returned by user-provided include resolvers.
114#[derive(Debug)]
115#[non_exhaustive]
116pub enum IncludeResolveError {
117    Io(std::io::Error),
118    Message(String),
119    SizeLimitExceeded(usize, usize),
120    FileInclude(Box<ResolveProblem>),
121}
122
123impl From<std::io::Error> for IncludeResolveError {
124    fn from(value: std::io::Error) -> Self {
125        Self::Io(value)
126    }
127}
128
129/// A request passed to the include resolver to resolve an include directive.
130pub struct IncludeRequest<'a> {
131    /// The include specification (e.g. the path or URL).
132    pub spec: &'a str,
133    /// The name of the file or source currently being parsed (top of the include stack).
134    pub from_name: &'a str,
135    /// The canonical identity of the source currently being parsed, or None for the root parser.
136    pub from_id: Option<&'a str>,
137    /// The full chain of inclusions leading to this request, with the current file at the end.
138    pub stack: Vec<String>,
139    /// Remaining decoded byte quota available for additional reader-backed input, if configured.
140    pub size_remaining: Option<usize>,
141    /// The location in the source file where the include was requested.
142    pub location: crate::Location,
143}
144
145/// Callback used to resolve `!include` directives during parsing.
146///
147/// The resolver receives an [`IncludeRequest`] describing what was requested, from which
148/// source it originated, and where in the source file the directive was encountered. It must
149/// either return a [`ResolvedInclude`] with a stable `id`, human-friendly `name`, and the
150/// replacement [`InputSource`], or fail with [`IncludeResolveError`].
151///
152/// The `id` should uniquely identify the underlying resource after any normalization you need
153/// (for example, a canonical filesystem path or a normalized URL). `serde-saphyr` uses this
154/// identifier for include-stack tracking and cycle detection. The `name` is intended for error
155/// messages and can be more user-friendly.
156///
157/// Resolvers may return:
158/// - [`InputSource::Text`] for ordinary in-memory YAML,
159/// - [`InputSource::AnchoredText`] when the include should behave as if a specific anchor was
160///   the first parsed node, or
161/// - [`InputSource::Reader`] when content should be streamed from an owned reader.
162///
163/// A resolver is invoked lazily, when a `!include` tag is encountered. Because the type is
164/// `FnMut`, the callback may keep state such as caches, metrics, or a virtual file map.
165///
166/// ```rust
167/// # #[cfg(feature = "include")]
168/// # {
169/// use serde::Deserialize;
170/// use serde_saphyr::{
171///     from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
172///     ResolvedInclude,
173/// };
174///
175/// #[derive(Debug, Deserialize, PartialEq)]
176/// struct Config {
177///     users: Vec<User>,
178/// }
179///
180/// #[derive(Debug, Deserialize, PartialEq)]
181/// struct User {
182///     name: String,
183/// }
184///
185/// let root_yaml = "users: !include virtual://users.yaml\n";
186/// let users_yaml = "- name: Alice\n- name: Bob\n";
187///
188/// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
189///     assert_eq!(req.spec, "virtual://users.yaml");
190///     assert_eq!(req.from_name, "<input>");
191///
192///     if req.spec == "virtual://users.yaml" {
193///         Ok(ResolvedInclude {
194///             id: req.spec.to_owned(),
195///             name: "virtual users".to_owned(),
196///             source: InputSource::from_string(users_yaml.to_owned()),
197///         })
198///     } else {
199///         Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
200///     }
201/// });
202///
203/// let config: Config = from_str_with_options(root_yaml, options).unwrap();
204/// assert_eq!(config.users.len(), 2);
205/// assert_eq!(config.users[0].name, "Alice");
206/// # }
207/// ```
208pub type IncludeResolver<'a> =
209    dyn FnMut(IncludeRequest<'_>) -> Result<ResolvedInclude, IncludeResolveError> + 'a;
210
211impl InputSource {
212    #[inline]
213    pub fn from_string(s: String) -> Self {
214        Self::Text(s)
215    }
216
217    #[inline]
218    pub fn from_reader<R>(r: R) -> Self
219    where
220        R: Read + 'static,
221    {
222        Self::Reader(Box::new(r))
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::InputSource;
229
230    #[test]
231    fn debug_formats_each_input_source_variant() {
232        let text = InputSource::from_string("hello".to_owned());
233        assert_eq!(format!("{text:?}"), "Text(\"hello\")");
234
235        let anchored = InputSource::AnchoredText {
236            text: "body: true\n".to_owned(),
237            anchor: "defaults".to_owned(),
238        };
239        assert_eq!(
240            format!("{anchored:?}"),
241            "AnchoredText { anchor: \"defaults\", text: \"body: true\\n\" }"
242        );
243
244        let reader = InputSource::from_reader(std::io::Cursor::new(b"stream".to_vec()));
245        assert_eq!(format!("{reader:?}"), "Reader(..)");
246    }
247}