Skip to main content

serde_saphyr/de/
options.rs

1use crate::budget::Budget;
2use crate::indentation::RequireIndent;
3use serde::{Deserialize, Serialize};
4#[cfg(feature = "properties")]
5use std::collections::HashMap;
6#[cfg(feature = "include_fs")]
7use std::io;
8#[cfg(feature = "include_fs")]
9use std::path::Path;
10use std::rc::Rc;
11
12// Intentionally no `saphyr_parser` imports here: include resolvers are handled in serde-saphyr.
13
14/// Duplicate key handling policy for mappings.
15#[non_exhaustive]
16#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
17pub enum DuplicateKeyPolicy {
18    /// Error out on encountering a duplicate key.
19    Error,
20    /// First key wins: later duplicate pairs are skipped (key+value are consumed and ignored).
21    FirstWins,
22    /// Last key wins: later duplicate pairs are passed through (default Serde targets typically overwrite).
23    LastWins,
24}
25
26/// Limits applied to alias replay to harden against alias bombs.
27#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
28pub struct AliasLimits {
29    /// Maximum total number of **replayed** events injected from aliases across the entire parse.
30    /// When exceeded, deserialization errors (alias replay limit exceeded).
31    pub max_total_replayed_events: usize,
32    /// Maximum depth of the alias replay stack (nested alias → injected buffer → alias, etc.).
33    pub max_replay_stack_depth: usize,
34    /// Maximum number of times a **single anchor id** may be expanded via alias.
35    /// Use `usize::MAX` for "unlimited".
36    pub max_alias_expansions_per_anchor: usize,
37}
38
39impl Default for AliasLimits {
40    fn default() -> Self {
41        Self {
42            max_total_replayed_events: 1_000_000,
43            max_replay_stack_depth: 64,
44            max_alias_expansions_per_anchor: usize::MAX,
45        }
46    }
47}
48
49/// Parser configuration options.
50///
51/// Use this to configure duplicate-key policy, alias-replay limits, and an
52/// optional pre-parse YAML [`Budget`].
53///
54/// Example: parse a small `Config` using custom `Options`.
55///
56/// ```rust
57/// use serde::Deserialize;
58///
59/// use serde_saphyr::options::DuplicateKeyPolicy;
60/// use serde_saphyr::{from_str_with_options, Budget, Options};
61///
62/// #[derive(Deserialize)]
63/// struct Config {
64///     name: String,
65///     enabled: bool,
66///     retries: i32,
67/// }
68///
69/// let yaml = r#"
70/// name: My Application
71/// enabled: true
72/// retries: 5
73/// "#;
74///
75/// let options = serde_saphyr::options! {
76///     budget: serde_saphyr::budget! {
77///         max_documents: 2,
78///     },
79///     duplicate_keys: DuplicateKeyPolicy::LastWins,
80/// };
81///
82/// let cfg: Config = from_str_with_options(yaml, options).unwrap();
83/// assert_eq!(cfg.name, "My Application");
84/// ```
85#[derive(Clone, Serialize, Deserialize)]
86pub struct Options {
87    /// Optional YAML budget to enforce before parsing (counts raw parser events).
88    pub budget: Option<Budget>,
89    /// Optional callback invoked with the final budget report after parsing.
90    /// It is invoked both when parsing is successful and when budget was breached.
91    #[serde(skip)]
92    pub budget_report: Option<fn(&crate::budget::BudgetReport)>,
93
94    /// Invoked both when parsing is successful and when budget was breached.
95    #[serde(skip)]
96    pub budget_report_cb: Option<BudgetReportCallback>,
97
98    /// Policy for duplicate keys.
99    pub duplicate_keys: DuplicateKeyPolicy,
100    /// Limits for alias replay to harden against alias bombs.
101    pub alias_limits: AliasLimits,
102    /// Enable legacy octal parsing where values starting with `00` are treated as base-8.
103    /// They are deprecated in YAML 1.2. Default: false.
104    pub legacy_octal_numbers: bool,
105    /// If true, interpret only the exact literals `true` and `false` as booleans.
106    /// YAML 1.1 forms like `yes`/`no`/`on`/`off` will be rejected and not inferred.
107    /// Default: false (accept YAML 1.1 boolean forms).
108    pub strict_booleans: bool,
109    /// When a field marked with the `!!binary` tag is deserialized into a `String`,
110    /// `serde-saphyr` normally expects the value to be base64-encoded UTF-8.
111    /// If you want to treat the value as a plain string and ignore the `!!binary` tag,
112    /// set this to `true` (the default is `false`).
113    pub ignore_binary_tag_for_string: bool,
114    /// Activates YAML conventions common in robotics community. These extensions support
115    /// conversion functions (deg, rad) and simple mathematical expressions such as deg(180),
116    /// rad(pi), 1 + 2*(3 - 4/5), or rad(pi/2). [robotics] feature must also be enabled.
117    pub angle_conversions: bool,
118    /// If true, values that can be parsed as booleans or numbers are rejected as
119    /// unquoted strings. This flag is intended for teams that want to enforce
120    /// compatibility with YAML parsers that infer types from unquoted values,
121    /// requiring such strings to be explicitly quoted.
122    /// The default is false (a number or boolean will be stored in the string
123    /// field exactly as provided, without quoting).
124    pub no_schema: bool,
125
126    /// If true (default), public APIs that have access to the original YAML input
127    /// will wrap returned errors with a snippet wrapper, enabling rustc-like snippet
128    /// rendering when a location is available.
129    pub with_snippet: bool,
130
131    /// Horizontal crop radius (in character columns) when rendering snippet diagnostics.
132    ///
133    /// The renderer crops all displayed lines (including the context lines) to the same
134    /// column window around the reported error column, so they stay vertically aligned.
135    ///
136    /// If set to `0`, snippet wrapping is disabled (the original, unwrapped error is returned).
137    pub crop_radius: usize,
138
139    /// Indentation requirement for the parsed document.
140    pub require_indent: RequireIndent,
141
142    /// Optional include resolver callback.
143    ///
144    /// When provided, it can push parsers onto the internal parser stack to resolve `!include`
145    ///-like constructs.
146    #[cfg(feature = "include")]
147    #[serde(skip)]
148    pub include_resolver: Option<IncludeResolverCallback>,
149
150    /// A map of properties to substitute in scalar values.
151    /// Used for docker-compose-style interpolation like `${VAR}`.
152    #[cfg(feature = "properties")]
153    #[serde(skip)]
154    pub property_map: Option<Rc<HashMap<String, String>>>,
155}
156
157#[cfg(feature = "include")]
158pub type IncludeResolverCallback = Rc<
159    std::cell::RefCell<
160        dyn for<'res> FnMut(
161                crate::input_source::IncludeRequest<'res>,
162            )
163                -> Result<crate::ResolvedInclude, crate::IncludeResolveError>
164            + 'static,
165    >,
166>;
167
168pub type BudgetReportCallback =
169    Rc<std::cell::RefCell<dyn FnMut(crate::budget::BudgetReport) + 'static>>;
170
171impl Options {
172    /// Registers a budget-report callback. Any closure can be used,  including ones that
173    /// capture state from the surrounding scope.
174    ///
175    /// The callback is invoked with the final [`crate::budget::BudgetReport`] after parsing
176    /// completes, both on success and when the budget is breached.
177    ///
178    /// ```rust
179    /// use serde_saphyr::Options;
180    /// use serde_saphyr::budget::BudgetReport;
181    ///
182    /// let options = Options::default().with_budget_report(|report: BudgetReport| {
183    ///     // e.g. update your state / emit metrics / log the report
184    ///     let _ = report;
185    /// });
186    /// ```
187    #[allow(deprecated)]
188    pub fn with_budget_report<F>(mut self, cb: F) -> Self
189    where
190        F: FnMut(crate::budget::BudgetReport) + 'static,
191    {
192        self.budget_report_cb = Some(Rc::new(std::cell::RefCell::new(cb)));
193        self
194    }
195
196    /// Installs a property map used for `${NAME}` interpolation in plain scalars.
197    ///
198    /// This is the intended public API for the `properties` feature. It consumes the provided
199    /// [`HashMap`] and stores it in the internal shared representation used by nested
200    /// deserializers, so callers do not have to construct `Rc` or `Some(...)` manually.
201    ///
202    /// ```rust
203    /// # #[cfg(feature = "properties")]
204    /// # {
205    /// use std::collections::HashMap;
206    /// use serde_saphyr::Options;
207    ///
208    /// let mut properties = HashMap::new();
209    /// properties.insert("MODE".to_string(), "production".to_string());
210    ///
211    /// let options = Options::default().with_properties(properties);
212    /// # let _ = options;
213    /// # }
214    /// ```
215    #[cfg(feature = "properties")]
216    pub fn with_properties(mut self, properties: HashMap<String, String>) -> Self {
217        self.property_map = Some(Rc::new(properties));
218        self
219    }
220
221    /// Sets the include resolver callback to be used during parsing.
222    ///
223    /// This method is for advances use cases. If you just want to include files from the
224    /// filesystem, use [`Options::with_filesystem_root`] instead that will use [`crate::SafeFileResolver`]
225    ///
226    /// The callback is invoked each time the parser encounters a `!include` tag. It receives an
227    /// [`crate::input_source::IncludeRequest`] describing the requested include target, the source
228    /// that triggered it, the include stack, and the source location. The callback must then
229    /// return either a [`crate::ResolvedInclude`] or a [`crate::IncludeResolveError`].
230    ///
231    /// This is useful for virtual filesystems, embedded configuration bundles, network-backed
232    /// resolvers, or custom caching layers.
233    ///
234    /// ```rust
235    /// # #[cfg(feature = "include")]
236    /// # {
237    /// use serde::Deserialize;
238    /// use serde_saphyr::{
239    ///     from_str_with_options, options, IncludeRequest, IncludeResolveError, InputSource,
240    ///     ResolvedInclude,
241    /// };
242    ///
243    /// #[derive(Debug, Deserialize, PartialEq)]
244    /// struct Config {
245    ///     users: Vec<User>,
246    /// }
247    ///
248    /// #[derive(Debug, Deserialize, PartialEq)]
249    /// struct User {
250    ///     name: String,
251    /// }
252    ///
253    /// let root_yaml = "users: !include virtual://users.yaml\n";
254    /// let users_yaml = "- name: Alice\n- name: Bob\n";
255    ///
256    /// let options = options! {}.with_include_resolver(|req: IncludeRequest<'_>| {
257    ///     assert_eq!(req.spec, "virtual://users.yaml");
258    ///     assert_eq!(req.from_name, "<input>");
259    ///
260    ///     if req.spec == "virtual://users.yaml" {
261    ///         Ok(ResolvedInclude {
262    ///             id: req.spec.to_owned(),
263    ///             name: "virtual users".to_owned(),
264    ///             source: InputSource::from_string(users_yaml.to_owned()),
265    ///         })
266    ///     } else {
267    ///         Err(IncludeResolveError::Message(format!("unknown include: {}", req.spec)))
268    ///     }
269    /// });
270    ///
271    /// let config: Config = from_str_with_options(root_yaml, options).unwrap();
272    /// assert_eq!(config.users.len(), 2);
273    /// assert_eq!(config.users[0].name, "Alice");
274    /// # }
275    /// ```
276    #[cfg(feature = "include")]
277    pub fn with_include_resolver<F>(mut self, cb: F) -> Self
278    where
279        F: for<'res> FnMut(
280                crate::input_source::IncludeRequest<'res>,
281            )
282                -> Result<crate::ResolvedInclude, crate::IncludeResolveError>
283            + 'static,
284    {
285        self.include_resolver = Some(Rc::new(std::cell::RefCell::new(cb)));
286        self
287    }
288
289    /// Configures a [`crate::SafeFileResolver`] rooted at `path` for `!include` lookups.
290    ///
291    /// This is a convenience for:
292    ///
293    /// ```rust,no_run
294    /// # #[cfg(feature = "include_fs")]
295    /// # fn main() -> Result<(), std::io::Error> {
296    /// # use serde_saphyr::{options, SafeFileResolver};
297    /// let options = options! {}
298    ///     .with_include_resolver(SafeFileResolver::new("./configs")?.into_callback());
299    /// # let _ = options;
300    /// # Ok(())
301    /// # }
302    /// ```
303    ///
304    /// It enables filesystem-backed `!include` resolution while keeping every included file
305    /// confined to the canonicalized `path` root.
306    ///
307    /// ```rust,no_run
308    /// # #[cfg(feature = "include_fs")]
309    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
310    /// use serde::Deserialize;
311    /// use serde_saphyr::{from_str_with_options, options};
312    ///
313    /// #[derive(Debug, Deserialize)]
314    /// struct User {
315    ///     name: String,
316    /// }
317    ///
318    /// #[derive(Debug, Deserialize)]
319    /// struct Config {
320    ///     users: Vec<User>,
321    /// }
322    ///
323    /// let yaml = "users: !include#users value.yaml\n";
324    /// let options = options! {}.with_filesystem_root("./examples")?;
325    /// let config: Config = from_str_with_options(yaml, options)?;
326    /// # let _ = config;
327    /// # Ok(())
328    /// # }
329    /// ```
330    #[cfg(feature = "include_fs")]
331    pub fn with_filesystem_root<P>(self, path: P) -> io::Result<Self>
332    where
333        P: AsRef<Path>,
334    {
335        Ok(self.with_include_resolver(crate::SafeFileResolver::new(path)?.into_callback()))
336    }
337}
338
339impl Default for Options {
340    #[allow(deprecated)]
341    fn default() -> Self {
342        Self {
343            budget: Some(Budget::default()),
344            budget_report: None,
345            budget_report_cb: None,
346            duplicate_keys: DuplicateKeyPolicy::Error,
347            alias_limits: AliasLimits::default(),
348            legacy_octal_numbers: false,
349            strict_booleans: false,
350            angle_conversions: false,
351            ignore_binary_tag_for_string: false,
352            no_schema: false,
353            with_snippet: true,
354            crop_radius: 64,
355            require_indent: RequireIndent::Unchecked,
356
357            #[cfg(feature = "include")]
358            include_resolver: None,
359            #[cfg(feature = "properties")]
360            property_map: None,
361        }
362    }
363}
364
365impl std::fmt::Debug for Options {
366    #[allow(deprecated)]
367    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
368        f.debug_struct("Options")
369            .field("budget", &self.budget)
370            .field("budget_report", &self.budget_report)
371            .field(
372                "budget_report_cb",
373                &if self.budget_report_cb.is_some() {
374                    "set"
375                } else {
376                    "none"
377                },
378            )
379            .field("duplicate_keys", &self.duplicate_keys)
380            .field("alias_limits", &self.alias_limits)
381            .field("legacy_octal_numbers", &self.legacy_octal_numbers)
382            .field("strict_booleans", &self.strict_booleans)
383            .field(
384                "ignore_binary_tag_for_string",
385                &self.ignore_binary_tag_for_string,
386            )
387            .field("angle_conversions", &self.angle_conversions)
388            .field("no_schema", &self.no_schema)
389            .field("with_snippet", &self.with_snippet)
390            .field("crop_radius", &self.crop_radius)
391            .field("require_indent", &self.require_indent)
392            .field("include_resolver", &{
393                #[cfg(feature = "include")]
394                {
395                    if self.include_resolver.is_some() {
396                        "set"
397                    } else {
398                        "none"
399                    }
400                }
401                #[cfg(not(feature = "include"))]
402                {
403                    "disabled"
404                }
405            })
406            .field("property_map", &{
407                #[cfg(feature = "properties")]
408                {
409                    if self.property_map.is_some() {
410                        "set"
411                    } else {
412                        "none"
413                    }
414                }
415                #[cfg(not(feature = "properties"))]
416                {
417                    "disabled"
418                }
419            })
420            .finish()
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[cfg(feature = "include_fs")]
429    use crate::input_source::{IncludeRequest, InputSource};
430    #[cfg(feature = "include_fs")]
431    use std::path::PathBuf;
432    #[cfg(feature = "include_fs")]
433    use tempfile::tempdir;
434
435    #[test]
436    fn test_options_default() {
437        let opts = Options::default();
438        assert!(opts.budget.is_some());
439        assert!(opts.budget_report.is_none());
440        assert!(opts.budget_report_cb.is_none());
441        assert!(matches!(opts.duplicate_keys, DuplicateKeyPolicy::Error));
442        assert_eq!(opts.alias_limits.max_total_replayed_events, 1_000_000);
443        assert!(!opts.legacy_octal_numbers);
444        assert!(!opts.strict_booleans);
445        assert!(!opts.ignore_binary_tag_for_string);
446        assert!(!opts.angle_conversions);
447        assert!(!opts.no_schema);
448        assert!(opts.with_snippet);
449        assert_eq!(opts.crop_radius, 64);
450        assert_eq!(opts.require_indent, RequireIndent::Unchecked);
451
452        #[cfg(feature = "include")]
453        assert!(opts.include_resolver.is_none());
454        #[cfg(feature = "properties")]
455        {
456            assert!(opts.property_map.is_none());
457        }
458    }
459
460    #[test]
461    fn test_options_debug_format() {
462        let opts = Options::default();
463        let debug_str = format!("{:?}", opts);
464        assert!(debug_str.contains("Options"));
465        assert!(debug_str.contains("budget"));
466        assert!(debug_str.contains("budget_report_cb: \"none\""));
467
468        #[cfg(feature = "include")]
469        assert!(debug_str.contains("include_resolver: \"none\""));
470        #[cfg(feature = "properties")]
471        {
472            assert!(debug_str.contains("property_map: \"none\""));
473        }
474        #[cfg(not(feature = "properties"))]
475        {
476            assert!(debug_str.contains("property_map: \"disabled\""));
477        }
478
479        // Test with callback
480        let opts_with_cb = opts.with_budget_report(|_| {});
481        let debug_str_cb = format!("{:?}", opts_with_cb);
482        assert!(debug_str_cb.contains("budget_report_cb: \"set\""));
483    }
484
485    #[cfg(feature = "properties")]
486    #[test]
487    fn test_with_properties_sets_property_map() {
488        let mut properties = std::collections::HashMap::new();
489        properties.insert("MODE".to_string(), "production".to_string());
490
491        let opts = Options::default().with_properties(properties);
492
493        assert_eq!(
494            opts.property_map.as_deref().unwrap().get("MODE"),
495            Some(&"production".to_string())
496        );
497    }
498
499    #[test]
500    fn test_alias_limits_default() {
501        let limits = AliasLimits::default();
502        assert_eq!(limits.max_total_replayed_events, 1_000_000);
503        assert_eq!(limits.max_replay_stack_depth, 64);
504        assert_eq!(limits.max_alias_expansions_per_anchor, usize::MAX);
505    }
506
507    #[cfg(feature = "include_fs")]
508    #[test]
509    #[cfg_attr(miri, ignore)]
510    fn test_with_filesystem_root_sets_include_resolver() {
511        let root = PathBuf::from(".");
512        let opts = Options::default().with_filesystem_root(&root).unwrap();
513        assert!(opts.include_resolver.is_some());
514    }
515
516    #[cfg(feature = "include_fs")]
517    #[test]
518    #[cfg_attr(miri, ignore)]
519    fn test_with_filesystem_root_uses_reader_default_for_regular_files() {
520        let dir = tempdir().unwrap();
521        std::fs::write(dir.path().join("child.yaml"), "value: 1\n").unwrap();
522
523        let opts = Options::default().with_filesystem_root(dir.path()).unwrap();
524        let mut resolver = opts
525            .include_resolver
526            .as_ref()
527            .expect("resolver set")
528            .borrow_mut();
529        let resolved = resolver(IncludeRequest {
530            spec: "child.yaml",
531            from_name: "<input>",
532            from_id: None,
533            stack: vec!["<input>".to_string()],
534            location: crate::Location::UNKNOWN,
535            size_remaining: None,
536        })
537        .unwrap();
538
539        assert!(matches!(resolved.source, InputSource::Reader(_)));
540    }
541}