serde_saphyr/de/options.rs
1use crate::budget::Budget;
2use crate::indentation::RequireIndent;
3use serde::{Deserialize, Serialize};
4#[cfg(feature = "properties")]
5use std::collections::HashMap;
6#[cfg(feature = "include_fs")]
7use std::io;
8#[cfg(feature = "include_fs")]
9use std::path::Path;
10use std::rc::Rc;
11
12// Intentionally no `saphyr_parser` imports here: include resolvers are handled in serde-saphyr.
13
14/// Duplicate key handling policy for mappings.
15#[non_exhaustive]
16#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
17pub enum DuplicateKeyPolicy {
18 /// Error out on encountering a duplicate key.
19 Error,
20 /// First key wins: later duplicate pairs are skipped (key+value are consumed and ignored).
21 FirstWins,
22 /// Last key wins: later duplicate pairs are passed through (default Serde targets typically overwrite).
23 LastWins,
24}
25
26/// Limits applied to alias replay to harden against alias bombs.
27#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
28pub struct AliasLimits {
29 /// Maximum total number of **replayed** events injected from aliases across the entire parse.
30 /// When exceeded, deserialization errors (alias replay limit exceeded).
31 pub max_total_replayed_events: usize,
32 /// Maximum depth of the alias replay stack (nested alias → injected buffer → alias, etc.).
33 pub max_replay_stack_depth: usize,
34 /// Maximum number of times a **single anchor id** may be expanded via alias.
35 /// Use `usize::MAX` for "unlimited".
36 pub max_alias_expansions_per_anchor: usize,
37}
38
39impl Default for AliasLimits {
40 fn default() -> Self {
41 Self {
42 max_total_replayed_events: 1_000_000,
43 max_replay_stack_depth: 64,
44 max_alias_expansions_per_anchor: usize::MAX,
45 }
46 }
47}
48
49/// Parser configuration options.
50///
51/// Use this to configure duplicate-key policy, alias-replay limits, and an
52/// optional pre-parse YAML [`Budget`].
53///
54/// Example: parse a small `Config` using custom `Options`.
55///
56/// ```rust
57/// use serde::Deserialize;
58///
59/// use serde_saphyr::options::DuplicateKeyPolicy;
60/// use serde_saphyr::{from_str_with_options, Budget, Options};
61///
62/// #[derive(Deserialize)]
63/// struct Config {
64/// name: String,
65/// enabled: bool,
66/// retries: i32,
67/// }
68///
69/// let yaml = r#"
70/// name: My Application
71/// enabled: true
72/// retries: 5
73/// "#;
74///
75/// let options = serde_saphyr::options! {
76/// budget: serde_saphyr::budget! {
77/// max_documents: 2,
78/// },
79/// duplicate_keys: DuplicateKeyPolicy::LastWins,
80/// };
81///
82/// let cfg: Config = from_str_with_options(yaml, options).unwrap();
83/// assert_eq!(cfg.name, "My Application");
84/// ```
85#[derive(Clone, Serialize, Deserialize)]
86pub struct Options {
87 /// Optional YAML budget to enforce before parsing (counts raw parser events).
88 pub budget: Option<Budget>,
89 /// Optional callback invoked with the final budget report after parsing.
90 /// It is invoked both when parsing is successful and when budget was breached.
91 #[serde(skip)]
92 pub budget_report: Option<fn(&crate::budget::BudgetReport)>,
93
94 /// Invoked both when parsing is successful and when budget was breached.
95 #[serde(skip)]
96 pub budget_report_cb: Option<BudgetReportCallback>,
97
98 /// Policy for duplicate keys.
99 pub duplicate_keys: DuplicateKeyPolicy,
100 /// Limits for alias replay to harden against alias bombs.
101 pub alias_limits: AliasLimits,
102 /// Enable legacy octal parsing where values starting with `00` are treated as base-8.
103 /// They are deprecated in YAML 1.2. Default: false.
104 pub legacy_octal_numbers: bool,
105 /// If true, interpret only the exact literals `true` and `false` as booleans.
106 /// YAML 1.1 forms like `yes`/`no`/`on`/`off` will be rejected and not inferred.
107 /// Default: false (accept YAML 1.1 boolean forms).
108 pub strict_booleans: bool,
109 /// When a field marked with the `!!binary` tag is deserialized into a `String`,
110 /// `serde-saphyr` normally expects the value to be base64-encoded UTF-8.
111 /// If you want to treat the value as a plain string and ignore the `!!binary` tag,
112 /// set this to `true` (the default is `false`).
113 pub ignore_binary_tag_for_string: bool,
114 /// Activates YAML conventions common in robotics community. These extensions support
115 /// conversion functions (deg, rad) and simple mathematical expressions such as deg(180),
116 /// rad(pi), 1 + 2*(3 - 4/5), or rad(pi/2). [robotics] feature must also be enabled.
117 pub angle_conversions: bool,
118 /// If true, values that can be parsed as booleans or numbers are rejected as
119 /// unquoted strings. This flag is intended for teams that want to enforce
120 /// compatibility with YAML parsers that infer types from unquoted values,
121 /// requiring such strings to be explicitly quoted.
122 /// The default is false (a number or boolean will be stored in the string
123 /// field exactly as provided, without quoting).
124 pub no_schema: bool,
125
126 /// If true (default), public APIs that have access to the original YAML input
127 /// will wrap returned errors with a snippet wrapper, enabling rustc-like snippet
128 /// rendering when a location is available.
129 pub with_snippet: bool,
130
131 /// Horizontal crop radius (in character columns) when rendering snippet diagnostics.
132 ///
133 /// The renderer crops all displayed lines (including the context lines) to the same
134 /// column window around the reported error column, so they stay vertically aligned.
135 ///
136 /// If set to `0`, snippet wrapping is disabled (the original, unwrapped error is returned).
137 pub crop_radius: usize,
138
139 /// Indentation requirement for the parsed document.
140 pub require_indent: RequireIndent,
141
142 /// Optional include resolver callback.
143 ///
144 /// When provided, it can push parsers onto the internal parser stack to resolve `!include`
145 ///-like constructs.
146 #[cfg(feature = "include")]
147 #[serde(skip)]
148 pub include_resolver: Option<IncludeResolverCallback>,
149
150 /// A map of properties to substitute in scalar values.
151 /// Used for docker-compose-style interpolation like `${VAR}`.
152 #[cfg(feature = "properties")]
153 #[serde(skip)]
154 pub property_map: Option<Rc<HashMap<String, String>>>,
155}
156
157#[cfg(feature = "include")]
158pub type IncludeResolverCallback = Rc<
159 std::cell::RefCell<
160 dyn for<'res> FnMut(
161 crate::input_source::IncludeRequest<'res>,
162 )
163 -> Result<crate::ResolvedInclude, crate::IncludeResolveError>
164 + 'static,
165 >,
166>;
167
168pub type BudgetReportCallback =
169 Rc<std::cell::RefCell<dyn FnMut(crate::budget::BudgetReport) + 'static>>;
170
171impl Options {
172 /// Registers a budget-report callback. Any closure can be used, including ones that
173 /// capture state from the surrounding scope.
174 ///
175 /// The callback is invoked with the final [`crate::budget::BudgetReport`] after parsing
176 /// completes, both on success and when the budget is breached.
177 ///
178 /// ```rust
179 /// use serde_saphyr::Options;
180 /// use serde_saphyr::budget::BudgetReport;
181 ///
182 /// let options = Options::default().with_budget_report(|report: BudgetReport| {
183 /// // e.g. update your state / emit metrics / log the report
184 /// let _ = report;
185 /// });
186 /// ```
187 #[allow(deprecated)]
188 pub fn with_budget_report<F>(mut self, cb: F) -> Self
189 where
190 F: FnMut(crate::budget::BudgetReport) + 'static,
191 {
192 self.budget_report_cb = Some(Rc::new(std::cell::RefCell::new(cb)));
193 self
194 }
195
196 /// Installs a property map used for `${NAME}` interpolation in plain scalars.
197 ///
198 /// This is the intended public API for the `properties` feature. It consumes the provided
199 /// [`HashMap`] and stores it in the internal shared representation used by nested
200 /// deserializers, so callers do not have to construct `Rc` or `Some(...)` manually.
201 ///
202 /// ```rust
203 /// # #[cfg(feature = "properties")]
204 /// # {
205 /// use std::collections::HashMap;
206 /// use serde_saphyr::Options;
207 ///
208 /// let mut properties = HashMap::new();
209 /// properties.insert("MODE".to_string(), "production".to_string());
210 ///
211 /// let options = Options::default().with_properties(properties);
212 /// # let _ = options;
213 /// # }
214 /// ```
215 #[cfg(feature = "properties")]
216 pub fn with_properties(mut self, properties: HashMap<String, String>) -> Self {
217 self.property_map = Some(Rc::new(properties));
218 self
219 }
220
221 /// Sets the include resolver callback to be used during parsing.
222 ///
223 /// This method is for advances use cases. If you just want to include files from the
224 /// filesystem, use [`Options::with_filesystem_root`] instead that will use [`crate::SafeFileResolver`]
225 ///
226 /// The callback is invoked each time the parser encounters a `!include` tag. It receives an
227 /// [`crate::input_source::IncludeRequest`] describing the requested include target, the source
228 /// that triggered it, the include stack, and the source location. The callback must then
229 /// return either a [`crate::ResolvedInclude`] or a [`crate::IncludeResolveError`].
230 ///
231 /// This is useful for virtual filesystems, embedded configuration bundles, network-backed
232 /// resolvers, or custom caching layers.
233 ///
234 /// ```rust
235 /// # #[cfg(feature = "include")]
236 /// # {
237 /// use serde::Deserialize;
238 /// use serde_saphyr::{
239 /// from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
240 /// ResolvedInclude,
241 /// };
242 ///
243 /// #[derive(Debug, Deserialize, PartialEq)]
244 /// struct Config {
245 /// users: Vec<User>,
246 /// }
247 ///
248 /// #[derive(Debug, Deserialize, PartialEq)]
249 /// struct User {
250 /// name: String,
251 /// }
252 ///
253 /// let root_yaml = "users: !include virtual://users.yaml\n";
254 /// let users_yaml = "- name: Alice\n- name: Bob\n";
255 ///
256 /// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
257 /// assert_eq!(req.spec, "virtual://users.yaml");
258 /// assert_eq!(req.from_name, "<input>");
259 ///
260 /// if req.spec == "virtual://users.yaml" {
261 /// Ok(ResolvedInclude {
262 /// id: req.spec.to_owned(),
263 /// name: "virtual users".to_owned(),
264 /// source: InputSource::from_string(users_yaml.to_owned()),
265 /// })
266 /// } else {
267 /// Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
268 /// }
269 /// });
270 ///
271 /// let config: Config = from_str_with_options(root_yaml, options).unwrap();
272 /// assert_eq!(config.users.len(), 2);
273 /// assert_eq!(config.users[0].name, "Alice");
274 /// # }
275 /// ```
276 #[cfg(feature = "include")]
277 pub fn with_include_resolver<F>(mut self, cb: F) -> Self
278 where
279 F: for<'res> FnMut(
280 crate::input_source::IncludeRequest<'res>,
281 )
282 -> Result<crate::ResolvedInclude, crate::IncludeResolveError>
283 + 'static,
284 {
285 self.include_resolver = Some(Rc::new(std::cell::RefCell::new(cb)));
286 self
287 }
288
289 /// Configures a [`crate::SafeFileResolver`] rooted at `path` for `!include` lookups.
290 ///
291 /// This is a convenience for:
292 ///
293 /// ```rust,no_run
294 /// # #[cfg(feature = "include_fs")]
295 /// # fn main() -> Result<(), std::io::Error> {
296 /// # use serde_saphyr::{options, SafeFileResolver};
297 /// let options = options! {}
298 /// .with_include_resolver(SafeFileResolver::new("./configs")?.into_callback());
299 /// # let _ = options;
300 /// # Ok(())
301 /// # }
302 /// ```
303 ///
304 /// It enables filesystem-backed `!include` resolution while keeping every included file
305 /// confined to the canonicalized `path` root.
306 ///
307 /// ```rust,no_run
308 /// # #[cfg(feature = "include_fs")]
309 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
310 /// use serde::Deserialize;
311 /// use serde_saphyr::{from_str_with_options, options};
312 ///
313 /// #[derive(Debug, Deserialize)]
314 /// struct User {
315 /// name: String,
316 /// }
317 ///
318 /// #[derive(Debug, Deserialize)]
319 /// struct Config {
320 /// users: Vec<User>,
321 /// }
322 ///
323 /// let yaml = "users: !include#users value.yaml\n";
324 /// let options = options! {}.with_filesystem_root("./examples")?;
325 /// let config: Config = from_str_with_options(yaml, options)?;
326 /// # let _ = config;
327 /// # Ok(())
328 /// # }
329 /// ```
330 #[cfg(feature = "include_fs")]
331 pub fn with_filesystem_root<P>(self, path: P) -> io::Result<Self>
332 where
333 P: AsRef<Path>,
334 {
335 Ok(self.with_include_resolver(crate::SafeFileResolver::new(path)?.into_callback()))
336 }
337}
338
339impl Default for Options {
340 #[allow(deprecated)]
341 fn default() -> Self {
342 Self {
343 budget: Some(Budget::default()),
344 budget_report: None,
345 budget_report_cb: None,
346 duplicate_keys: DuplicateKeyPolicy::Error,
347 alias_limits: AliasLimits::default(),
348 legacy_octal_numbers: false,
349 strict_booleans: false,
350 angle_conversions: false,
351 ignore_binary_tag_for_string: false,
352 no_schema: false,
353 with_snippet: true,
354 crop_radius: 64,
355 require_indent: RequireIndent::Unchecked,
356
357 #[cfg(feature = "include")]
358 include_resolver: None,
359 #[cfg(feature = "properties")]
360 property_map: None,
361 }
362 }
363}
364
365impl std::fmt::Debug for Options {
366 #[allow(deprecated)]
367 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368 f.debug_struct("Options")
369 .field("budget", &self.budget)
370 .field("budget_report", &self.budget_report)
371 .field(
372 "budget_report_cb",
373 &if self.budget_report_cb.is_some() {
374 "set"
375 } else {
376 "none"
377 },
378 )
379 .field("duplicate_keys", &self.duplicate_keys)
380 .field("alias_limits", &self.alias_limits)
381 .field("legacy_octal_numbers", &self.legacy_octal_numbers)
382 .field("strict_booleans", &self.strict_booleans)
383 .field(
384 "ignore_binary_tag_for_string",
385 &self.ignore_binary_tag_for_string,
386 )
387 .field("angle_conversions", &self.angle_conversions)
388 .field("no_schema", &self.no_schema)
389 .field("with_snippet", &self.with_snippet)
390 .field("crop_radius", &self.crop_radius)
391 .field("require_indent", &self.require_indent)
392 .field("include_resolver", &{
393 #[cfg(feature = "include")]
394 {
395 if self.include_resolver.is_some() {
396 "set"
397 } else {
398 "none"
399 }
400 }
401 #[cfg(not(feature = "include"))]
402 {
403 "disabled"
404 }
405 })
406 .field("property_map", &{
407 #[cfg(feature = "properties")]
408 {
409 if self.property_map.is_some() {
410 "set"
411 } else {
412 "none"
413 }
414 }
415 #[cfg(not(feature = "properties"))]
416 {
417 "disabled"
418 }
419 })
420 .finish()
421 }
422}
423
424#[cfg(test)]
425mod tests {
426 use super::*;
427
428 #[cfg(feature = "include_fs")]
429 use crate::input_source::{IncludeRequest, InputSource};
430 #[cfg(feature = "include_fs")]
431 use std::path::PathBuf;
432 #[cfg(feature = "include_fs")]
433 use tempfile::tempdir;
434
435 #[test]
436 fn test_options_default() {
437 let opts = Options::default();
438 assert!(opts.budget.is_some());
439 assert!(opts.budget_report.is_none());
440 assert!(opts.budget_report_cb.is_none());
441 assert!(matches!(opts.duplicate_keys, DuplicateKeyPolicy::Error));
442 assert_eq!(opts.alias_limits.max_total_replayed_events, 1_000_000);
443 assert!(!opts.legacy_octal_numbers);
444 assert!(!opts.strict_booleans);
445 assert!(!opts.ignore_binary_tag_for_string);
446 assert!(!opts.angle_conversions);
447 assert!(!opts.no_schema);
448 assert!(opts.with_snippet);
449 assert_eq!(opts.crop_radius, 64);
450 assert_eq!(opts.require_indent, RequireIndent::Unchecked);
451
452 #[cfg(feature = "include")]
453 assert!(opts.include_resolver.is_none());
454 #[cfg(feature = "properties")]
455 {
456 assert!(opts.property_map.is_none());
457 }
458 }
459
460 #[test]
461 fn test_options_debug_format() {
462 let opts = Options::default();
463 let debug_str = format!("{:?}", opts);
464 assert!(debug_str.contains("Options"));
465 assert!(debug_str.contains("budget"));
466 assert!(debug_str.contains("budget_report_cb: \"none\""));
467
468 #[cfg(feature = "include")]
469 assert!(debug_str.contains("include_resolver: \"none\""));
470 #[cfg(feature = "properties")]
471 {
472 assert!(debug_str.contains("property_map: \"none\""));
473 }
474 #[cfg(not(feature = "properties"))]
475 {
476 assert!(debug_str.contains("property_map: \"disabled\""));
477 }
478
479 // Test with callback
480 let opts_with_cb = opts.with_budget_report(|_| {});
481 let debug_str_cb = format!("{:?}", opts_with_cb);
482 assert!(debug_str_cb.contains("budget_report_cb: \"set\""));
483 }
484
485 #[cfg(feature = "properties")]
486 #[test]
487 fn test_with_properties_sets_property_map() {
488 let mut properties = std::collections::HashMap::new();
489 properties.insert("MODE".to_string(), "production".to_string());
490
491 let opts = Options::default().with_properties(properties);
492
493 assert_eq!(
494 opts.property_map.as_deref().unwrap().get("MODE"),
495 Some(&"production".to_string())
496 );
497 }
498
499 #[test]
500 fn test_alias_limits_default() {
501 let limits = AliasLimits::default();
502 assert_eq!(limits.max_total_replayed_events, 1_000_000);
503 assert_eq!(limits.max_replay_stack_depth, 64);
504 assert_eq!(limits.max_alias_expansions_per_anchor, usize::MAX);
505 }
506
507 #[cfg(feature = "include_fs")]
508 #[test]
509 #[cfg_attr(miri, ignore)]
510 fn test_with_filesystem_root_sets_include_resolver() {
511 let root = PathBuf::from(".");
512 let opts = Options::default().with_filesystem_root(&root).unwrap();
513 assert!(opts.include_resolver.is_some());
514 }
515
516 #[cfg(feature = "include_fs")]
517 #[test]
518 #[cfg_attr(miri, ignore)]
519 fn test_with_filesystem_root_uses_reader_default_for_regular_files() {
520 let dir = tempdir().unwrap();
521 std::fs::write(dir.path().join("child.yaml"), "value: 1\n").unwrap();
522
523 let opts = Options::default().with_filesystem_root(dir.path()).unwrap();
524 let mut resolver = opts
525 .include_resolver
526 .as_ref()
527 .expect("resolver set")
528 .borrow_mut();
529 let resolved = resolver(IncludeRequest {
530 spec: "child.yaml",
531 from_name: "<input>",
532 from_id: None,
533 stack: vec!["<input>".to_string()],
534 location: crate::Location::UNKNOWN,
535 size_remaining: None,
536 })
537 .unwrap();
538
539 assert!(matches!(resolved.source, InputSource::Reader(_)));
540 }
541}