serde_saphyr/de/input_source.rs
1use std::io::Read;
2
3/// Owned input that can be fed into the YAML parser.
4///
5/// This is primarily used by the include resolver: it can return either fully-owned
6/// in-memory text, or a fully-owned streaming reader.
7pub enum InputSource {
8 /// Owned text.
9 Text(String),
10 /// Owned YAML text together with the name of an anchor to extract from it.
11 ///
12 /// This is mainly intended for resolvers that support include specs such as
13 /// `path/to/file.yaml#anchor_name`. In that case, the resolver still receives the full
14 /// include spec via [`IncludeRequest::spec`], splits the file part from the fragment itself,
15 /// reads the target document, and returns:
16 ///
17 /// ```rust
18 /// # use serde_saphyr::InputSource;
19 /// let source = InputSource::AnchoredText {
20 /// text: "defaults: &defaults\n enabled: true\nfeature: *defaults\n".to_owned(),
21 /// anchor: "defaults".to_owned(),
22 /// };
23 /// ```
24 ///
25 /// During parsing, `serde-saphyr` will parse `text`, find the node tagged with `&defaults`,
26 /// and replay only that anchored node as the included value. Conceptually, this makes:
27 ///
28 /// ```yaml
29 /// settings: !include config.yaml#defaults
30 /// ```
31 ///
32 /// behave as if `settings` directly contained the YAML node anchored as `&defaults` inside
33 /// `config.yaml`.
34 ///
35 /// Use [`InputSource::Text`] when the whole document should be included, and use
36 /// [`InputSource::AnchoredText`] only when you want the include to resolve to a specific
37 /// anchored fragment.
38 AnchoredText { text: String, anchor: String },
39 /// Owned reader (streaming).
40 Reader(Box<dyn Read + 'static>),
41}
42
43impl std::fmt::Debug for InputSource {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 match self {
46 Self::Text(text) => f.debug_tuple("Text").field(text).finish(),
47 Self::AnchoredText { text, anchor } => f
48 .debug_struct("AnchoredText")
49 .field("anchor", anchor)
50 .field("text", text)
51 .finish(),
52 Self::Reader(_) => f.write_str("Reader(..)"),
53 }
54 }
55}
56
57/// A resolved include containing the source identity and the content.
58#[derive(Debug)]
59pub struct ResolvedInclude {
60 /// The canonical identity of the included source, used for cycle detection and absolute paths.
61 pub id: String,
62 /// The display name of the included source, used for error messages.
63 pub name: String,
64 /// The actual content to parse.
65 pub source: InputSource,
66}
67
68/// Specific problems encountered during file include resolution.
69#[derive(Debug)]
70#[non_exhaustive]
71pub enum ResolveProblem {
72 /// Failed to canonicalize the include target path.
73 ResolveFailed {
74 spec: String,
75 base_dir: String,
76 err: std::io::Error,
77 },
78 /// The include target is not a regular file.
79 TargetNotRegularFile { target: String },
80 /// The include target resolves to the configured root file itself (cyclic include).
81 TargetIsRootFile { spec: String },
82 /// The parent include id was not an absolute canonical path.
83 ParentIdNotAbsoluteCanonical { parent_id: String },
84 /// Failed to resolve the parent include source.
85 ParentResolveFailed {
86 parent_id: String,
87 from_name: String,
88 err: std::io::Error,
89 },
90 /// The parent include is not a regular file.
91 ParentNotRegularFile { parent: String },
92 /// The parent include does not have a parent directory.
93 ParentHasNoDirectory { parent: String },
94 /// The include resolves outside the configured root directory.
95 ResolvesOutsideRoot { spec: String, root: String },
96 /// The include traverses a symlink, which is disabled by policy.
97 TraversesSymlink { spec: String },
98 /// Absolute include paths are not allowed.
99 AbsolutePathNotAllowed { spec: String },
100 /// The include path is empty.
101 EmptyPath,
102 /// The include target does not have a valid YAML extension (.yml or .yaml).
103 InvalidExtension { spec: String },
104 /// The include target is a hidden file (starts with a dot).
105 HiddenFile { spec: String },
106 /// The include fragment is empty.
107 EmptyFragment,
108 /// The include fragment contains a '#' character.
109 FragmentContainsHash { spec: String },
110}
111
112/// Error type returned by user-provided include resolvers.
113#[derive(Debug)]
114#[non_exhaustive]
115pub enum IncludeResolveError {
116 Io(std::io::Error),
117 Message(String),
118 SizeLimitExceeded(usize, usize),
119 FileInclude(Box<ResolveProblem>),
120}
121
122impl From<std::io::Error> for IncludeResolveError {
123 fn from(value: std::io::Error) -> Self {
124 Self::Io(value)
125 }
126}
127
128/// A request passed to the include resolver to resolve an include directive.
129pub struct IncludeRequest<'a> {
130 /// The include specification (e.g. the path or URL).
131 pub spec: &'a str,
132 /// The name of the file or source currently being parsed (top of the include stack).
133 pub from_name: &'a str,
134 /// The canonical identity of the source currently being parsed, or None for the root parser.
135 pub from_id: Option<&'a str>,
136 /// The full chain of inclusions leading to this request, with the current file at the end.
137 pub stack: Vec<String>,
138 /// Remaining decoded byte quota available for additional reader-backed input, if configured.
139 pub size_remaining: Option<usize>,
140 /// The location in the source file where the include was requested.
141 pub location: crate::Location,
142}
143
144/// Callback used to resolve `!include` directives during parsing.
145///
146/// The resolver receives an [`IncludeRequest`] describing what was requested, from which
147/// source it originated, and where in the source file the directive was encountered. It must
148/// either return a [`ResolvedInclude`] with a stable `id`, human-friendly `name`, and the
149/// replacement [`InputSource`], or fail with [`IncludeResolveError`].
150///
151/// The `id` should uniquely identify the underlying resource after any normalization you need
152/// (for example, a canonical filesystem path or a normalized URL). `serde-saphyr` uses this
153/// identifier for include-stack tracking and cycle detection. The `name` is intended for error
154/// messages and can be more user-friendly.
155///
156/// Resolvers may return:
157/// - [`InputSource::Text`] for ordinary in-memory YAML,
158/// - [`InputSource::AnchoredText`] when the include should behave as if a specific anchor was
159/// the first parsed node, or
160/// - [`InputSource::Reader`] when content should be streamed from an owned reader.
161///
162/// A resolver is invoked lazily, when a `!include` tag is encountered. Because the type is
163/// `FnMut`, the callback may keep state such as caches, metrics, or a virtual file map.
164///
165/// ```rust
166/// # #[cfg(feature = "include")]
167/// # {
168/// use serde::Deserialize;
169/// use serde_saphyr::{
170/// from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
171/// ResolvedInclude,
172/// };
173///
174/// #[derive(Debug, Deserialize, PartialEq)]
175/// struct Config {
176/// users: Vec<User>,
177/// }
178///
179/// #[derive(Debug, Deserialize, PartialEq)]
180/// struct User {
181/// name: String,
182/// }
183///
184/// let root_yaml = "users: !include virtual://users.yaml\n";
185/// let users_yaml = "- name: Alice\n- name: Bob\n";
186///
187/// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
188/// assert_eq!(req.spec, "virtual://users.yaml");
189/// assert_eq!(req.from_name, "<input>");
190///
191/// if req.spec == "virtual://users.yaml" {
192/// Ok(ResolvedInclude {
193/// id: req.spec.to_owned(),
194/// name: "virtual users".to_owned(),
195/// source: InputSource::from_string(users_yaml.to_owned()),
196/// })
197/// } else {
198/// Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
199/// }
200/// });
201///
202/// let config: Config = from_str_with_options(root_yaml, options).unwrap();
203/// assert_eq!(config.users.len(), 2);
204/// assert_eq!(config.users[0].name, "Alice");
205/// # }
206/// ```
207pub type IncludeResolver<'a> =
208 dyn FnMut(IncludeRequest<'_>) -> Result<ResolvedInclude, IncludeResolveError> + 'a;
209
210impl InputSource {
211 #[inline]
212 pub fn from_string(s: String) -> Self {
213 Self::Text(s)
214 }
215
216 #[inline]
217 pub fn from_reader<R>(r: R) -> Self
218 where
219 R: Read + 'static,
220 {
221 Self::Reader(Box::new(r))
222 }
223}