serde_saphyr/de/input_source.rs
1use std::io::Read;
2
3/// Owned input that can be fed into the YAML parser.
4///
5/// This is primarily used by the include resolver: it can return either fully-owned
6/// in-memory text, or a fully-owned streaming reader.
7#[non_exhaustive]
8pub enum InputSource {
9 /// Owned text.
10 Text(String),
11 /// Owned YAML text together with the name of an anchor to extract from it.
12 ///
13 /// This is mainly intended for resolvers that support include specs such as
14 /// `path/to/file.yaml#anchor_name`. In that case, the resolver still receives the full
15 /// include spec via [`IncludeRequest::spec`], splits the file part from the fragment itself,
16 /// reads the target document, and returns:
17 ///
18 /// ```rust
19 /// # use serde_saphyr::InputSource;
20 /// let source = InputSource::AnchoredText {
21 /// text: "defaults: &defaults\n enabled: true\nfeature: *defaults\n".to_owned(),
22 /// anchor: "defaults".to_owned(),
23 /// };
24 /// ```
25 ///
26 /// During parsing, `serde-saphyr` will parse `text`, find the node tagged with `&defaults`,
27 /// and replay only that anchored node as the included value. Conceptually, this makes:
28 ///
29 /// ```yaml
30 /// settings: !include config.yaml#defaults
31 /// ```
32 ///
33 /// behave as if `settings` directly contained the YAML node anchored as `&defaults` inside
34 /// `config.yaml`.
35 ///
36 /// Use [`InputSource::Text`] when the whole document should be included, and use
37 /// [`InputSource::AnchoredText`] only when you want the include to resolve to a specific
38 /// anchored fragment.
39 AnchoredText { text: String, anchor: String },
40 /// Owned reader (streaming).
41 Reader(Box<dyn Read + 'static>),
42}
43
44impl std::fmt::Debug for InputSource {
45 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46 match self {
47 Self::Text(text) => f.debug_tuple("Text").field(text).finish(),
48 Self::AnchoredText { text, anchor } => f
49 .debug_struct("AnchoredText")
50 .field("anchor", anchor)
51 .field("text", text)
52 .finish(),
53 Self::Reader(_) => f.write_str("Reader(..)"),
54 }
55 }
56}
57
58/// A resolved include containing the source identity and the content.
59#[derive(Debug)]
60pub struct ResolvedInclude {
61 /// The canonical identity of the included source, used for cycle detection and absolute paths.
62 pub id: String,
63 /// The display name of the included source, used for error messages.
64 pub name: String,
65 /// The actual content to parse.
66 pub source: InputSource,
67}
68
69/// Specific problems encountered during file include resolution.
70#[derive(Debug)]
71#[non_exhaustive]
72pub enum ResolveProblem {
73 /// Failed to canonicalize the include target path.
74 ResolveFailed {
75 spec: String,
76 base_dir: String,
77 err: std::io::Error,
78 },
79 /// The include target is not a regular file.
80 TargetNotRegularFile { target: String },
81 /// The include target resolves to the configured root file itself (cyclic include).
82 TargetIsRootFile { spec: String },
83 /// The parent include id was not an absolute canonical path.
84 ParentIdNotAbsoluteCanonical { parent_id: String },
85 /// Failed to resolve the parent include source.
86 ParentResolveFailed {
87 parent_id: String,
88 from_name: String,
89 err: std::io::Error,
90 },
91 /// The parent include is not a regular file.
92 ParentNotRegularFile { parent: String },
93 /// The parent include does not have a parent directory.
94 ParentHasNoDirectory { parent: String },
95 /// The include resolves outside the configured root directory.
96 ResolvesOutsideRoot { spec: String, root: String },
97 /// The include traverses a symlink, which is disabled by policy.
98 TraversesSymlink { spec: String },
99 /// Absolute include paths are not allowed.
100 AbsolutePathNotAllowed { spec: String },
101 /// The include path is empty.
102 EmptyPath,
103 /// The include target does not have a valid YAML extension (.yml or .yaml).
104 InvalidExtension { spec: String },
105 /// The include target is a hidden file (starts with a dot).
106 HiddenFile { spec: String },
107 /// The include fragment is empty.
108 EmptyFragment,
109 /// The include fragment contains a '#' character.
110 FragmentContainsHash { spec: String },
111}
112
113/// Error type returned by user-provided include resolvers.
114#[derive(Debug)]
115#[non_exhaustive]
116pub enum IncludeResolveError {
117 Io(std::io::Error),
118 Message(String),
119 SizeLimitExceeded(usize, usize),
120 FileInclude(Box<ResolveProblem>),
121}
122
123impl From<std::io::Error> for IncludeResolveError {
124 fn from(value: std::io::Error) -> Self {
125 Self::Io(value)
126 }
127}
128
129/// A request passed to the include resolver to resolve an include directive.
130pub struct IncludeRequest<'a> {
131 /// The include specification (e.g. the path or URL).
132 pub spec: &'a str,
133 /// The name of the file or source currently being parsed (top of the include stack).
134 pub from_name: &'a str,
135 /// The canonical identity of the source currently being parsed, or None for the root parser.
136 pub from_id: Option<&'a str>,
137 /// The full chain of inclusions leading to this request, with the current file at the end.
138 pub stack: Vec<String>,
139 /// Remaining decoded byte quota available for additional reader-backed input, if configured.
140 pub size_remaining: Option<usize>,
141 /// The location in the source file where the include was requested.
142 pub location: crate::Location,
143}
144
145/// Callback used to resolve `!include` directives during parsing.
146///
147/// The resolver receives an [`IncludeRequest`] describing what was requested, from which
148/// source it originated, and where in the source file the directive was encountered. It must
149/// either return a [`ResolvedInclude`] with a stable `id`, human-friendly `name`, and the
150/// replacement [`InputSource`], or fail with [`IncludeResolveError`].
151///
152/// The `id` should uniquely identify the underlying resource after any normalization you need
153/// (for example, a canonical filesystem path or a normalized URL). `serde-saphyr` uses this
154/// identifier for include-stack tracking and cycle detection. The `name` is intended for error
155/// messages and can be more user-friendly.
156///
157/// Resolvers may return:
158/// - [`InputSource::Text`] for ordinary in-memory YAML,
159/// - [`InputSource::AnchoredText`] when the include should behave as if a specific anchor was
160/// the first parsed node, or
161/// - [`InputSource::Reader`] when content should be streamed from an owned reader.
162///
163/// A resolver is invoked lazily, when a `!include` tag is encountered. Because the type is
164/// `FnMut`, the callback may keep state such as caches, metrics, or a virtual file map.
165///
166/// ```rust
167/// # #[cfg(feature = "include")]
168/// # {
169/// use serde::Deserialize;
170/// use serde_saphyr::{
171/// from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
172/// ResolvedInclude,
173/// };
174///
175/// #[derive(Debug, Deserialize, PartialEq)]
176/// struct Config {
177/// users: Vec<User>,
178/// }
179///
180/// #[derive(Debug, Deserialize, PartialEq)]
181/// struct User {
182/// name: String,
183/// }
184///
185/// let root_yaml = "users: !include virtual://users.yaml\n";
186/// let users_yaml = "- name: Alice\n- name: Bob\n";
187///
188/// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
189/// assert_eq!(req.spec, "virtual://users.yaml");
190/// assert_eq!(req.from_name, "<input>");
191///
192/// if req.spec == "virtual://users.yaml" {
193/// Ok(ResolvedInclude {
194/// id: req.spec.to_owned(),
195/// name: "virtual users".to_owned(),
196/// source: InputSource::from_string(users_yaml.to_owned()),
197/// })
198/// } else {
199/// Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
200/// }
201/// });
202///
203/// let config: Config = from_str_with_options(root_yaml, options).unwrap();
204/// assert_eq!(config.users.len(), 2);
205/// assert_eq!(config.users[0].name, "Alice");
206/// # }
207/// ```
208pub type IncludeResolver<'a> =
209 dyn FnMut(IncludeRequest<'_>) -> Result<ResolvedInclude, IncludeResolveError> + 'a;
210
211impl InputSource {
212 #[inline]
213 pub fn from_string(s: String) -> Self {
214 Self::Text(s)
215 }
216
217 #[inline]
218 pub fn from_reader<R>(r: R) -> Self
219 where
220 R: Read + 'static,
221 {
222 Self::Reader(Box::new(r))
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::InputSource;
229
230 #[test]
231 fn debug_formats_each_input_source_variant() {
232 let text = InputSource::from_string("hello".to_owned());
233 assert_eq!(format!("{text:?}"), "Text(\"hello\")");
234
235 let anchored = InputSource::AnchoredText {
236 text: "body: true\n".to_owned(),
237 anchor: "defaults".to_owned(),
238 };
239 assert_eq!(
240 format!("{anchored:?}"),
241 "AnchoredText { anchor: \"defaults\", text: \"body: true\\n\" }"
242 );
243
244 let reader = InputSource::from_reader(std::io::Cursor::new(b"stream".to_vec()));
245 assert_eq!(format!("{reader:?}"), "Reader(..)");
246 }
247}