serde_saphyr/lib.rs
1#![forbid(unsafe_code)]
2// Options structs expose their fields for now, but callers are expected to migrate to the
3// `options!` / `ser_options!` macros. The fields are deprecated to guide downstream
4// users, while this crate still legitimately reads them internally.
5#![allow(deprecated)]
6//! Serialization public API is defined at crate root
7
8pub use anchors::{
9 ArcAnchor, ArcRecursion, ArcRecursive, ArcWeakAnchor, RcAnchor, RcRecursion, RcRecursive,
10 RcWeakAnchor,
11};
12pub use de::{Budget, DuplicateKeyPolicy, Error, Options};
13pub use de_error::TransformReason;
14pub use location::{Location, Locations, Span};
15pub use long_strings::{FoldStr, FoldString, LitStr, LitString};
16pub use ser::{Commented, FlowMap, FlowSeq, SpaceAfter};
17pub use spanned::Spanned;
18
19use crate::budget::EnforcingPolicy;
20use crate::de::{Ev, Events};
21use crate::live_events::LiveEvents;
22use crate::parse_scalars::scalar_is_nullish;
23pub use crate::serializer_options::SerializerOptions;
24use serde::de::DeserializeOwned;
25use std::io::Read;
26
27#[cfg(feature = "garde")]
28use garde::Validate;
29#[cfg(feature = "validator")]
30use validator::Validate as ValidatorValidate;
31
32mod anchor_store;
33mod anchors;
34mod base64;
35pub mod budget;
36mod de;
37mod de_error;
38#[path = "de/snippet.rs"]
39mod de_snipped;
40mod live_events;
41mod long_strings;
42pub mod options;
43mod parse_scalars;
44pub mod ser;
45mod spanned;
46
47#[cfg(any(feature = "garde", feature = "validator"))]
48pub mod path_map;
49
50pub mod ser_error;
51
52pub use de::YamlDeserializer as Deserializer;
53pub use ser::YamlSerializer as Serializer;
54
55pub use de::{
56 with_deserializer_from_reader, with_deserializer_from_reader_with_options,
57 with_deserializer_from_slice, with_deserializer_from_slice_with_options,
58 with_deserializer_from_str, with_deserializer_from_str_with_options,
59};
60
61mod serializer_options;
62mod macros;
63mod tags;
64
65pub(crate) mod ser_quoting;
66
67mod buffered_input;
68mod location;
69#[cfg(feature = "robotics")]
70pub mod robotics;
71
72#[cfg(feature = "miette")]
73pub mod miette;
74
75#[cfg(feature = "figment")]
76pub mod figment;
77pub(crate) mod ring_reader;
78mod wrapping;
79mod zmij_format;
80// ---------------- Serialization (public API) ----------------
81
82/// Serialize a value to a YAML `String`.
83///
84/// This is the easiest entry point when you just want a YAML string.
85///
86/// Example
87///
88/// ```rust
89/// use serde::Serialize;
90///
91/// #[derive(Serialize)]
92/// struct Foo { a: i32, b: bool }
93///
94/// let s = serde_saphyr::to_string(&Foo { a: 1, b: true }).unwrap();
95/// assert!(s.contains("a: 1"));
96/// ```
97pub fn to_string<T: serde::Serialize>(value: &T) -> std::result::Result<String, crate::ser::Error> {
98 let mut out = String::new();
99 to_fmt_writer(&mut out, value)?;
100 Ok(out)
101}
102
103/// Serialize a value to a YAML `String`, with [`SerializerOptions`].
104///
105/// This is like [`to_string`], but lets you control formatting and serialization
106/// behavior through the provided `options`.
107///
108/// Example
109///
110/// ```rust
111/// use serde::Serialize;
112/// use serde_saphyr::SerializerOptions;
113///
114/// #[derive(Serialize)]
115/// struct Foo { a: i32, b: bool }
116///
117/// let options = SerializerOptions::default();
118/// let s = serde_saphyr::to_string_with_options(&Foo { a: 1, b: true }, options).unwrap();
119/// assert!(s.contains("a: 1"));
120/// ```
121pub fn to_string_with_options<T: serde::Serialize>(
122 value: &T,
123 options: SerializerOptions,
124) -> std::result::Result<String, crate::ser::Error> {
125 let mut out = String::new();
126 to_fmt_writer_with_options(&mut out, value, options)?;
127 Ok(out)
128}
129
130/// Deprecated: use `to_fmt_writer` or `to_io_writer`
131/// Kept for a transition release to avoid instant breakage.
132#[deprecated(
133 since = "0.0.7",
134 note = "Use `to_fmt_writer` for `fmt::Write` (String, fmt::Formatter) or `to_io_writer` for files/sockets."
135)]
136pub fn to_writer<W: std::fmt::Write, T: serde::Serialize>(
137 output: &mut W,
138 value: &T,
139) -> std::result::Result<(), crate::ser::Error> {
140 let mut ser = crate::ser::YamlSerializer::new(output);
141 value.serialize(&mut ser)
142}
143
144/// Serialize a value as YAML into any [`fmt::Write`] target.
145pub fn to_fmt_writer<W: std::fmt::Write, T: serde::Serialize>(
146 output: &mut W,
147 value: &T,
148) -> std::result::Result<(), crate::ser::Error> {
149 to_fmt_writer_with_options(output, value, SerializerOptions::default())
150}
151
152/// Serialize a value as YAML into any [`io::Write`] target.
153pub fn to_io_writer<W: std::io::Write, T: serde::Serialize>(
154 output: &mut W,
155 value: &T,
156) -> std::result::Result<(), crate::ser::Error> {
157 to_io_writer_with_options(output, value, SerializerOptions::default())
158}
159
160/// Serialize a value as YAML into any [`fmt::Write`] target, with options.
161/// Options are consumed because anchor generator may be taken from them.
162pub fn to_fmt_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
163 output: &mut W,
164 value: &T,
165 mut options: SerializerOptions,
166) -> std::result::Result<(), crate::ser::Error> {
167 options.consistent()?;
168 let mut ser = crate::ser::YamlSerializer::with_options(output, &mut options);
169 value.serialize(&mut ser)
170}
171
172/// Serialize a value as YAML into any [`io::Write`] target, with options.
173/// Options are consumed because anchor generator may be taken from them.
174pub fn to_io_writer_with_options<W: std::io::Write, T: serde::Serialize>(
175 output: &mut W,
176 value: &T,
177 mut options: SerializerOptions,
178) -> std::result::Result<(), crate::ser::Error> {
179 options.consistent()?;
180 struct Adapter<'a, W: std::io::Write> {
181 output: &'a mut W,
182 last_err: Option<std::io::Error>,
183 }
184 impl<'a, W: std::io::Write> std::fmt::Write for Adapter<'a, W> {
185 fn write_str(&mut self, s: &str) -> std::fmt::Result {
186 if let Err(e) = self.output.write_all(s.as_bytes()) {
187 self.last_err = Some(e);
188 return Err(std::fmt::Error);
189 }
190 Ok(())
191 }
192 fn write_char(&mut self, c: char) -> std::fmt::Result {
193 let mut buf = [0u8; 4];
194 let s = c.encode_utf8(&mut buf);
195 self.write_str(s)
196 }
197 }
198 let mut adapter = Adapter {
199 output,
200 last_err: None,
201 };
202 let mut ser = crate::ser::YamlSerializer::with_options(&mut adapter, &mut options);
203 match value.serialize(&mut ser) {
204 Ok(()) => Ok(()),
205 Err(e) => {
206 if let Some(io_error) = adapter.last_err.take() {
207 return Err(crate::ser::Error::from(io_error));
208 }
209 Err(e)
210 }
211 }
212}
213
214/// Deprecated: use `to_fmt_writer_with_options` for `fmt::Write` or `to_io_writer_with_options` for `io::Write`.
215#[deprecated(
216 since = "0.0.7",
217 note = "Use `to_fmt_writer_with_options` for fmt::Write or `to_io_writer_with_options` for io::Write."
218)]
219pub fn to_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
220 output: &mut W,
221 value: &T,
222 options: SerializerOptions,
223) -> std::result::Result<(), crate::ser::Error> {
224 to_fmt_writer_with_options(output, value, options)
225}
226
227/// Deserialize any `T: serde::de::Deserialize<'de>` directly from a YAML string.
228///
229/// This is the simplest entry point; it parses a single YAML document. If the
230/// input contains multiple documents, this returns an error advising to use
231/// [`from_multiple`] or [`from_multiple_with_options`].
232///
233/// This function supports both owned types (like `String`) and borrowed types
234/// (like `&str`). For borrowed types, the deserialized value's lifetime is tied
235/// to the input string's lifetime.
236///
237/// **Note**: Borrowing only works for simple plain scalars that don't require
238/// any transformation (no multi-line folding, no escape processing). For
239/// transformed strings, deserialization to `&str` will fail with a helpful
240/// error message suggesting to use `String` or `Cow<str>` instead.
241///
242/// Example: read a small `Config` structure from a YAML string.
243///
244/// ```rust
245/// use serde::Deserialize;
246///
247/// #[derive(Debug, Deserialize, PartialEq)]
248/// struct Config {
249/// name: String,
250/// enabled: bool,
251/// retries: i32,
252/// }
253///
254/// let yaml = r#"
255/// name: My Application
256/// enabled: true
257/// retries: 5
258/// "#;
259///
260/// let cfg: Config = serde_saphyr::from_str(yaml).unwrap();
261/// assert!(cfg.enabled);
262/// ```
263///
264/// Example: read a structure with borrowed string fields.
265///
266/// Borrowed strings are supported when deserializing from an in-memory input (`from_str` / `from_slice`),
267/// and only when the scalar exists verbatim in the input (i.e., no escape processing, folding, or other
268/// normalization is required). If the YAML scalar requires transformation, deserializing into `&str`
269/// fails with an error suggesting `String` or `Cow<str>`.
270///
271/// Note: reader-based entry points like [`from_reader`] require `DeserializeOwned` and therefore cannot
272/// return values that borrow from the input.
273///
274/// ```rust
275/// use serde::Deserialize;
276///
277/// #[derive(Debug, Deserialize, PartialEq)]
278/// struct Data<'a> {
279/// name: &'a str,
280/// value: i32,
281/// }
282///
283/// let yaml = "name: hello\nvalue: 42\n";
284///
285/// let data: Data = serde_saphyr::from_str(yaml).unwrap();
286/// assert_eq!(data.name, "hello");
287/// assert_eq!(data.value, 42);
288/// ```
289pub fn from_str<'de, T>(input: &'de str) -> Result<T, Error>
290where
291 T: serde::de::Deserialize<'de>,
292{
293 from_str_with_options(input, Options::default())
294}
295
296#[allow(deprecated)]
297fn from_str_with_options_impl<'de, T>(input: &'de str, options: Options) -> Result<T, Error>
298where
299 T: serde::de::Deserialize<'de>,
300{
301 // Normalize: ignore a single leading UTF-8 BOM if present.
302 let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
303 rest
304 } else {
305 input
306 };
307
308 let with_snippet = options.with_snippet;
309 let crop_radius = options.crop_radius;
310
311 let cfg = crate::de::Cfg::from_options(&options);
312 // Do not stop at DocumentEnd; we'll probe for trailing content/errors explicitly.
313 let mut src = LiveEvents::from_str(
314 input,
315 options.budget,
316 options.budget_report,
317 options.budget_report_cb,
318 options.alias_limits,
319 false,
320 );
321 let value_res = crate::anchor_store::with_document_scope(|| {
322 T::deserialize(crate::de::YamlDeserializer::new(&mut src, cfg))
323 });
324 let value = match value_res {
325 Ok(v) => v,
326 Err(e) => {
327 if src.synthesized_null_emitted() {
328 let err = Error::eof().with_location(src.last_location());
329 return Err(maybe_with_snippet(err, input, with_snippet, crop_radius));
330 } else {
331 return Err(maybe_with_snippet(e, input, with_snippet, crop_radius));
332 }
333 }
334 };
335
336 match src.peek() {
337 Ok(Some(_)) => {
338 let err = Error::msg(
339 "multiple YAML documents detected; use from_multiple or from_multiple_with_options",
340 )
341 .with_location(src.last_location());
342 return Err(maybe_with_snippet(err, input, with_snippet, crop_radius));
343 }
344 Ok(None) => {}
345 Err(e) => {
346 if src.seen_doc_end() {
347 // Trailing garbage after a proper document end marker is ignored.
348 } else {
349 return Err(maybe_with_snippet(e, input, with_snippet, crop_radius));
350 }
351 }
352 }
353
354 src.finish()
355 .map_err(|e| maybe_with_snippet(e, input, with_snippet, crop_radius))?;
356 Ok(value)
357}
358
359/// Deserialize a single YAML document with configurable [`Options`].
360///
361/// This function supports both owned types (like `String`) and borrowed types
362/// (like `&str`). For borrowed types, the deserialized value's lifetime is tied
363/// to the input string's lifetime.
364///
365/// Example: read a small `Config` with a custom budget and default duplicate-key policy.
366///
367/// ```rust
368/// use serde::Deserialize;
369/// use serde_saphyr::DuplicateKeyPolicy;
370///
371/// #[derive(Debug, Deserialize, PartialEq)]
372/// struct Config {
373/// name: String,
374/// enabled: bool,
375/// retries: i32,
376/// }
377///
378/// let yaml = r#"
379/// name: My Application
380/// enabled: true
381/// retries: 5
382/// "#;
383///
384/// let options = serde_saphyr::options! {
385/// budget: serde_saphyr::budget! {
386/// max_anchors: 200,
387/// },
388/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
389/// };
390/// let cfg: Config = serde_saphyr::from_str_with_options(yaml, options).unwrap();
391/// assert_eq!(cfg.retries, 5);
392/// ```
393#[allow(deprecated)]
394pub fn from_str_with_options<'de, T>(input: &'de str, options: Options) -> Result<T, Error>
395where
396 T: serde::de::Deserialize<'de>,
397{
398 from_str_with_options_impl(input, options)
399}
400
401/// Deserialize a single YAML document with configurable [`Options`], and also
402/// return a map from validation paths to source [`Location`]s.
403#[cfg(any(feature = "garde", feature = "validator"))]
404#[allow(deprecated)]
405fn from_str_with_options_and_path_recorder<T: DeserializeOwned>(
406 input: &str,
407 options: Options,
408) -> Result<(T, crate::path_map::PathRecorder), Error> {
409 // Normalize: ignore a single leading UTF-8 BOM if present.
410 let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
411 rest
412 } else {
413 input
414 };
415
416 let with_snippet = options.with_snippet;
417 let crop_radius = options.crop_radius;
418
419 let cfg = crate::de::Cfg::from_options(&options);
420 let mut src = LiveEvents::from_str(
421 input,
422 options.budget,
423 options.budget_report,
424 options.budget_report_cb,
425 options.alias_limits,
426 false,
427 );
428
429 let mut recorder = crate::path_map::PathRecorder::new();
430
431 let value_res = crate::anchor_store::with_document_scope(|| {
432 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
433 &mut src,
434 cfg,
435 &mut recorder,
436 ))
437 });
438 let value = match value_res {
439 Ok(v) => v,
440 Err(e) => {
441 if src.synthesized_null_emitted() {
442 let err = Error::eof().with_location(src.last_location());
443 return Err(maybe_with_snippet(err, input, with_snippet, crop_radius));
444 } else {
445 return Err(maybe_with_snippet(e, input, with_snippet, crop_radius));
446 }
447 }
448 };
449
450 match src.peek() {
451 Ok(Some(_)) => {
452 let err = Error::msg(
453 "multiple YAML documents detected; use from_multiple or from_multiple_with_options",
454 )
455 .with_location(src.last_location());
456 return Err(maybe_with_snippet(err, input, with_snippet, crop_radius));
457 }
458 Ok(None) => {}
459 Err(e) => {
460 if src.seen_doc_end() {
461 // ignore trailing garbage
462 } else {
463 return Err(maybe_with_snippet(e, input, with_snippet, crop_radius));
464 }
465 }
466 }
467
468 src.finish()
469 .map_err(|e| maybe_with_snippet(e, input, with_snippet, crop_radius))?;
470
471 Ok((value, recorder))
472}
473
474/// Deserialize a single YAML document from a YAML string and validate it with `garde`.
475/// The error message will contain a snippet with exact location information, and if the
476/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
477#[cfg(feature = "garde")]
478pub fn from_str_valid<T>(input: &str) -> Result<T, Error>
479where
480 T: DeserializeOwned + garde::Validate,
481 <T as garde::Validate>::Context: Default,
482{
483 from_str_with_options_valid(input, Options::default())
484}
485
486/// Deserialize a single YAML document with configurable [`Options`] and validate it with `garde`.
487/// The error message will contain a snippet with exact location information, and if the
488/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
489#[cfg(feature = "garde")]
490pub fn from_str_with_options_valid<T>(input: &str, options: Options) -> Result<T, Error>
491where
492 T: DeserializeOwned + garde::Validate,
493 <T as garde::Validate>::Context: Default,
494{
495 let with_snippet = options.with_snippet;
496 let crop_radius = options.crop_radius;
497
498 let (v, recorder) = from_str_with_options_and_path_recorder::<T>(input, options)?;
499 match Validate::validate(&v) {
500 Ok(()) => Ok(v),
501 Err(report) => {
502 let err = Error::ValidationError {
503 report,
504 locations: recorder.map,
505 };
506 Err(maybe_with_snippet(err, input, with_snippet, crop_radius))
507 }
508 }
509}
510
511/// Deserialize a single YAML document with configurable [`Options`] and validate it with `garde` in context [`<T as garde::Validate>::Context`].
512/// The error message will contain a snippet with exact location information, and if the
513/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
514#[cfg(feature = "garde")]
515pub fn from_str_with_options_context_valid<T>(
516 input: &str,
517 options: Options,
518 context: &<T as garde::Validate>::Context,
519) -> Result<T, Error>
520where
521 T: DeserializeOwned + garde::Validate,
522{
523 let with_snippet = options.with_snippet;
524 let crop_radius = options.crop_radius;
525
526 let (v, recorder) = from_str_with_options_and_path_recorder::<T>(input, options)?;
527 match Validate::validate_with(&v, context) {
528 Ok(()) => Ok(v),
529 Err(report) => {
530 let err = Error::ValidationError {
531 report,
532 locations: recorder.map,
533 };
534 Err(maybe_with_snippet(err, input, with_snippet, crop_radius))
535 }
536 }
537}
538
539/// Deserialize multiple YAML documents from a YAML string and validate each with `garde`.
540/// The error message will contain a snippet with exact location information, and if the
541/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
542#[cfg(feature = "garde")]
543pub fn from_multiple_valid<T: DeserializeOwned + garde::Validate>(
544 input: &str,
545) -> Result<Vec<T>, Error>
546where
547 <T as garde::Validate>::Context: Default,
548{
549 from_multiple_with_options_valid(input, Options::default())
550}
551
552/// Deserialize multiple YAML documents with configurable [`Options`] and validate each with `garde`.
553/// The error message will contain a snippet with exact location information, and if the
554/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
555#[cfg(feature = "garde")]
556#[allow(deprecated)]
557pub fn from_multiple_with_options_valid<T>(input: &str, options: Options) -> Result<Vec<T>, Error>
558where
559 T: DeserializeOwned + garde::Validate,
560 <T as garde::Validate>::Context: Default,
561{
562 let with_snippet = options.with_snippet;
563 let crop_radius = options.crop_radius;
564
565 let cfg = crate::de::Cfg::from_options(&options);
566 let mut src = LiveEvents::from_str(
567 input,
568 options.budget,
569 options.budget_report,
570 options.budget_report_cb,
571 options.alias_limits,
572 false,
573 );
574 let mut values = Vec::new();
575 let mut validation_errors: Vec<Error> = Vec::new();
576
577 loop {
578 match src.peek()? {
579 // Skip documents that are explicit null-like scalars ("", "~", or "null").
580 Some(Ev::Scalar {
581 value: s, style, ..
582 }) if scalar_is_nullish(s, style) => {
583 let _ = src.next()?; // consume the null scalar document
584 continue;
585 }
586 Some(_) => {
587 let mut recorder = crate::path_map::PathRecorder::new();
588 let value_res = crate::anchor_store::with_document_scope(|| {
589 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
590 &mut src,
591 cfg,
592 &mut recorder,
593 ))
594 });
595 let value = match value_res {
596 Ok(v) => v,
597 Err(e) => return Err(maybe_with_snippet(e, input, with_snippet, crop_radius)),
598 };
599
600 match Validate::validate(&value) {
601 Ok(()) => {
602 values.push(value);
603 }
604 Err(report) => {
605 let err = Error::ValidationError {
606 report,
607 locations: recorder.map,
608 };
609 validation_errors.push(maybe_with_snippet(
610 err,
611 input,
612 with_snippet,
613 crop_radius,
614 ));
615 }
616 }
617 }
618 None => break,
619 }
620 }
621
622 src.finish()
623 .map_err(|e| maybe_with_snippet(e, input, with_snippet, crop_radius))?;
624
625 if validation_errors.is_empty() {
626 Ok(values)
627 } else {
628 Err(Error::ValidationErrors {
629 errors: validation_errors,
630 })
631 }
632}
633
634/// Deserialize a single YAML document from bytes and validate it with `garde`.
635/// The error message will contain a snippet with exact location information, and if the
636/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
637#[cfg(feature = "garde")]
638pub fn from_slice_valid<T: DeserializeOwned + garde::Validate>(bytes: &[u8]) -> Result<T, Error>
639where
640 <T as garde::Validate>::Context: Default,
641{
642 from_slice_with_options_valid(bytes, Options::default())
643}
644
645/// Deserialize a single YAML document from bytes and validate it with `garde`.
646/// The error message will contain a snippet with exact location information, and if the
647/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
648#[cfg(feature = "garde")]
649pub fn from_slice_with_options_valid<T: DeserializeOwned + garde::Validate>(
650 bytes: &[u8],
651 options: Options,
652) -> Result<T, Error>
653where
654 <T as garde::Validate>::Context: Default,
655{
656 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
657 from_str_with_options_valid(s, options)
658}
659
660/// Deserialize multiple YAML documents from bytes with options and validate each with `garde`.
661/// The error message will contain a snippet with exact location information, and if the
662/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
663#[cfg(feature = "garde")]
664pub fn from_slice_multiple_with_options_valid<T>(
665 bytes: &[u8],
666 options: Options,
667) -> Result<Vec<T>, Error>
668where
669 T: DeserializeOwned + garde::Validate,
670 <T as garde::Validate>::Context: Default,
671{
672 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
673 from_multiple_with_options_valid(s, options)
674}
675
676/// Deserialize a single YAML document from a reader and validate it with `garde`.
677/// As there is no access to the full text of the document, the error message will not contain
678/// a snippet.
679#[cfg(feature = "garde")]
680pub fn from_reader_valid<R: std::io::Read, T>(reader: R) -> Result<T, Error>
681where
682 T: DeserializeOwned + garde::Validate,
683 <T as garde::Validate>::Context: Default,
684{
685 from_reader_with_options_valid(reader, Options::default())
686}
687
688/// Deserialize a single YAML document from a reader with options and validate it with `garde`.
689/// As there is no access to the full text of the document, the error message will not contain
690/// a snippet.
691#[cfg(feature = "garde")]
692#[allow(deprecated)]
693pub fn from_reader_with_options_valid<R: std::io::Read, T>(
694 reader: R,
695 options: Options,
696) -> Result<T, Error>
697where
698 T: DeserializeOwned + garde::Validate,
699 <T as garde::Validate>::Context: Default,
700{
701 let cfg = crate::de::Cfg::from_options(&options);
702 let mut src = LiveEvents::from_reader(
703 reader,
704 options.budget,
705 options.budget_report,
706 options.budget_report_cb,
707 options.alias_limits,
708 false,
709 EnforcingPolicy::AllContent,
710 );
711
712 let mut recorder = crate::path_map::PathRecorder::new();
713
714 let value_res = crate::anchor_store::with_document_scope(|| {
715 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
716 &mut src,
717 cfg,
718 &mut recorder,
719 ))
720 });
721 let value = match value_res {
722 Ok(v) => v,
723 Err(e) => {
724 if src.synthesized_null_emitted() {
725 // If the only thing in the input was an empty document (synthetic null),
726 // surface this as an EOF error to preserve expected error semantics
727 // for incompatible target types (e.g., bool).
728 return Err(Error::eof().with_location(src.last_location()));
729 } else {
730 return Err(e);
731 }
732 }
733 };
734
735 if let Err(report) = Validate::validate(&value) {
736 return Err(Error::ValidationError {
737 report,
738 locations: recorder.map,
739 });
740 }
741
742 // After finishing first document, peek ahead to detect either another document/content
743 // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
744 // ignore the trailing garbage. Otherwise, surface the error.
745 match src.peek() {
746 Ok(Some(_)) => {
747 return Err(Error::msg(
748 "multiple YAML documents detected; use read_valid or read_with_options_valid to obtain the iterator",
749 )
750 .with_location(src.last_location()));
751 }
752 Ok(None) => {}
753 Err(e) => {
754 if src.seen_doc_end() {
755 // Trailing garbage after a proper document end marker is ignored.
756 } else {
757 return Err(e);
758 }
759 }
760 }
761
762 src.finish()?;
763 Ok(value)
764}
765
766/// Create an iterator over validated YAML documents from a reader.
767/// As there is no access to the full text of the document, the error message will not contain
768/// a snippet.
769#[cfg(feature = "garde")]
770pub fn read_valid<'a, R, T>(reader: &'a mut R) -> impl Iterator<Item = Result<T, Error>> + 'a
771where
772 R: Read + 'a,
773 T: DeserializeOwned + garde::Validate + 'a,
774 <T as garde::Validate>::Context: Default,
775{
776 read_with_options_valid(reader, Default::default())
777}
778
779/// Create an iterator over validated YAML documents from a reader with configurable options.
780/// As there is no access to the full text of the document, the error message will not contain
781/// a snippet.
782#[cfg(feature = "garde")]
783#[allow(deprecated)]
784pub fn read_with_options_valid<'a, R, T>(
785 reader: &'a mut R,
786 options: Options,
787) -> impl Iterator<Item = Result<T, Error>> + 'a
788where
789 R: Read + 'a,
790 T: DeserializeOwned + garde::Validate + 'a,
791 <T as garde::Validate>::Context: Default,
792{
793 struct ReadValidIter<'a, T> {
794 src: LiveEvents<'a>, // borrows from `reader`
795 cfg: crate::de::Cfg,
796 finished: bool,
797 _marker: std::marker::PhantomData<T>,
798 }
799
800 impl<'a, T> Iterator for ReadValidIter<'a, T>
801 where
802 T: DeserializeOwned + garde::Validate + 'a,
803 <T as garde::Validate>::Context: Default,
804 {
805 type Item = Result<T, Error>;
806
807 fn next(&mut self) -> Option<Self::Item> {
808 if self.finished {
809 return None;
810 }
811 loop {
812 match self.src.peek() {
813 Ok(Some(Ev::Scalar { value, style, .. }))
814 if scalar_is_nullish(value, style) =>
815 {
816 let _ = self.src.next();
817 continue;
818 }
819 Ok(Some(_)) => {
820 let mut recorder = crate::path_map::PathRecorder::new();
821 let value_res = crate::anchor_store::with_document_scope(|| {
822 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
823 &mut self.src,
824 self.cfg,
825 &mut recorder,
826 ))
827 });
828 let value = match value_res {
829 Ok(v) => v,
830 Err(e) => {
831 // After a deserialization error, skip remaining events in the
832 // current document and try to recover at the next document boundary.
833 if !self.src.skip_to_next_document() {
834 self.finished = true;
835 }
836 return Some(Err(e));
837 }
838 };
839
840 match Validate::validate(&value) {
841 Ok(()) => return Some(Ok(value)),
842 Err(report) => {
843 // Validation errors occur after successful deserialization,
844 // so the parser is already at the document boundary.
845 // No need to skip or mark as finished - continue to next document.
846 return Some(Err(Error::ValidationError {
847 report,
848 locations: recorder.map,
849 }));
850 }
851 }
852 }
853 Ok(None) => {
854 self.finished = true;
855 if let Err(e) = self.src.finish() {
856 return Some(Err(e));
857 }
858 return None;
859 }
860 Err(e) => {
861 self.finished = true;
862 let _ = self.src.finish();
863 return Some(Err(e));
864 }
865 }
866 }
867 }
868 }
869
870 let cfg = crate::de::Cfg::from_options(&options);
871 let src = LiveEvents::from_reader(
872 reader,
873 options.budget,
874 options.budget_report,
875 options.budget_report_cb,
876 options.alias_limits,
877 false,
878 EnforcingPolicy::PerDocument,
879 );
880
881 ReadValidIter::<T> {
882 src,
883 cfg,
884 finished: false,
885 _marker: std::marker::PhantomData,
886 }
887}
888
889/// Deserialize a single YAML document from a YAML string and validate it with `validator`.
890/// The error message will contain a snippet with exact location information, and if the
891/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
892#[cfg(feature = "validator")]
893pub fn from_str_validate<T>(input: &str) -> Result<T, Error>
894where
895 T: DeserializeOwned + ValidatorValidate,
896{
897 from_str_with_options_validate(input, Options::default())
898}
899
900/// Deserialize a single YAML document with configurable [`Options`] and validate it with `validator`.
901/// The error message will contain a snippet with exact location information, and if the
902/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
903#[cfg(feature = "validator")]
904pub fn from_str_with_options_validate<T>(input: &str, options: Options) -> Result<T, Error>
905where
906 T: DeserializeOwned + ValidatorValidate,
907{
908 let with_snippet = options.with_snippet;
909 let crop_radius = options.crop_radius;
910
911 let (v, recorder) = from_str_with_options_and_path_recorder::<T>(input, options)?;
912 match ValidatorValidate::validate(&v) {
913 Ok(()) => Ok(v),
914 Err(errors) => {
915 let err = Error::ValidatorError {
916 errors,
917 locations: recorder.map,
918 };
919 Err(maybe_with_snippet(err, input, with_snippet, crop_radius))
920 }
921 }
922}
923
924/// Deserialize multiple YAML documents from a YAML string and validate each with `validator`.
925/// The error message will contain a snippet with exact location information, and if the
926/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
927#[cfg(feature = "validator")]
928pub fn from_multiple_validate<T: DeserializeOwned + ValidatorValidate>(
929 input: &str,
930) -> Result<Vec<T>, Error> {
931 from_multiple_with_options_validate(input, Options::default())
932}
933
934/// Deserialize multiple YAML documents with configurable [`Options`] and validate each with `validator`.
935/// The error message will contain a snippet with exact location information, and if the
936/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
937#[cfg(feature = "validator")]
938#[allow(deprecated)]
939pub fn from_multiple_with_options_validate<T>(
940 input: &str,
941 options: Options,
942) -> Result<Vec<T>, Error>
943where
944 T: DeserializeOwned + ValidatorValidate,
945{
946 let with_snippet = options.with_snippet;
947 let crop_radius = options.crop_radius;
948
949 let cfg = crate::de::Cfg::from_options(&options);
950 let mut src = LiveEvents::from_str(
951 input,
952 options.budget,
953 options.budget_report,
954 options.budget_report_cb,
955 options.alias_limits,
956 false,
957 );
958 let mut values = Vec::new();
959 let mut validation_errors: Vec<Error> = Vec::new();
960
961 loop {
962 match src.peek()? {
963 // Skip documents that are explicit null-like scalars ("", "~", or "null").
964 Some(Ev::Scalar {
965 value: s, style, ..
966 }) if scalar_is_nullish(s, style) => {
967 let _ = src.next()?; // consume the null scalar document
968 continue;
969 }
970 Some(_) => {
971 let mut recorder = crate::path_map::PathRecorder::new();
972 let value_res = crate::anchor_store::with_document_scope(|| {
973 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
974 &mut src,
975 cfg,
976 &mut recorder,
977 ))
978 });
979 let value = match value_res {
980 Ok(v) => v,
981 Err(e) => return Err(maybe_with_snippet(e, input, with_snippet, crop_radius)),
982 };
983
984 match ValidatorValidate::validate(&value) {
985 Ok(()) => {
986 values.push(value);
987 }
988 Err(errors) => {
989 let err = Error::ValidatorError {
990 errors,
991 locations: recorder.map,
992 };
993 validation_errors.push(maybe_with_snippet(
994 err,
995 input,
996 with_snippet,
997 crop_radius,
998 ));
999 }
1000 }
1001 }
1002 None => break,
1003 }
1004 }
1005
1006 src.finish()
1007 .map_err(|e| maybe_with_snippet(e, input, with_snippet, crop_radius))?;
1008
1009 if validation_errors.is_empty() {
1010 Ok(values)
1011 } else {
1012 Err(Error::ValidatorErrors {
1013 errors: validation_errors,
1014 })
1015 }
1016}
1017
1018/// Deserialize a single YAML document from bytes and validate it with `validator`.
1019/// The error message will contain a snippet with exact location information, and if the
1020/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
1021#[cfg(feature = "validator")]
1022pub fn from_slice_validate<T: DeserializeOwned + ValidatorValidate>(
1023 bytes: &[u8],
1024) -> Result<T, Error> {
1025 from_slice_with_options_validate(bytes, Options::default())
1026}
1027
1028/// Deserialize a single YAML document from bytes and validate it with `validator`.
1029/// The error message will contain a snippet with exact location information, and if the
1030/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
1031#[cfg(feature = "validator")]
1032pub fn from_slice_with_options_validate<T: DeserializeOwned + ValidatorValidate>(
1033 bytes: &[u8],
1034 options: Options,
1035) -> Result<T, Error> {
1036 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
1037 from_str_with_options_validate(s, options)
1038}
1039
1040/// Deserialize multiple YAML documents from bytes with options and validate each with `validator`.
1041/// The error message will contain a snippet with exact location information, and if the
1042/// invalid value comes from anchor, serde-saphyr will also tell where it is defined.
1043#[cfg(feature = "validator")]
1044pub fn from_slice_multiple_with_options_validate<T>(
1045 bytes: &[u8],
1046 options: Options,
1047) -> Result<Vec<T>, Error>
1048where
1049 T: DeserializeOwned + ValidatorValidate,
1050{
1051 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
1052 from_multiple_with_options_validate(s, options)
1053}
1054
1055/// Deserialize a single YAML document from a reader and validate it with `validator`.
1056/// As there is no access to the full text of the document, the error message will not contain
1057/// a snippet.
1058#[cfg(feature = "validator")]
1059pub fn from_reader_validate<R: std::io::Read, T>(reader: R) -> Result<T, Error>
1060where
1061 T: DeserializeOwned + ValidatorValidate,
1062{
1063 from_reader_with_options_validate(reader, Options::default())
1064}
1065
1066/// Deserialize a single YAML document from a reader with options and validate it with `validator`.
1067/// As there is no access to the full text of the document, the error message will not contain
1068/// a snippet.
1069#[cfg(feature = "validator")]
1070#[allow(deprecated)]
1071pub fn from_reader_with_options_validate<R: std::io::Read, T>(
1072 reader: R,
1073 options: Options,
1074) -> Result<T, Error>
1075where
1076 T: DeserializeOwned + ValidatorValidate,
1077{
1078 let cfg = crate::de::Cfg::from_options(&options);
1079 let mut src = LiveEvents::from_reader(
1080 reader,
1081 options.budget,
1082 options.budget_report,
1083 options.budget_report_cb,
1084 options.alias_limits,
1085 false,
1086 EnforcingPolicy::AllContent,
1087 );
1088
1089 let mut recorder = crate::path_map::PathRecorder::new();
1090
1091 let value_res = crate::anchor_store::with_document_scope(|| {
1092 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
1093 &mut src,
1094 cfg,
1095 &mut recorder,
1096 ))
1097 });
1098 let value = match value_res {
1099 Ok(v) => v,
1100 Err(e) => {
1101 if src.synthesized_null_emitted() {
1102 // If the only thing in the input was an empty document (synthetic null),
1103 // surface this as an EOF error to preserve expected error semantics
1104 // for incompatible target types (e.g., bool).
1105 return Err(Error::eof().with_location(src.last_location()));
1106 } else {
1107 return Err(e);
1108 }
1109 }
1110 };
1111
1112 if let Err(errors) = ValidatorValidate::validate(&value) {
1113 return Err(Error::ValidatorError {
1114 errors,
1115 locations: recorder.map,
1116 });
1117 }
1118
1119 // After finishing first document, peek ahead to detect either another document/content
1120 // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
1121 // ignore the trailing garbage. Otherwise, surface the error.
1122 match src.peek() {
1123 Ok(Some(_)) => {
1124 return Err(Error::msg(
1125 "multiple YAML documents detected; use read_validate or read_with_options_validate to obtain the iterator",
1126 )
1127 .with_location(src.last_location()));
1128 }
1129 Ok(None) => {}
1130 Err(e) => {
1131 if src.seen_doc_end() {
1132 // Trailing garbage after a proper document end marker is ignored.
1133 } else {
1134 return Err(e);
1135 }
1136 }
1137 }
1138
1139 src.finish()?;
1140 Ok(value)
1141}
1142
1143/// Create an iterator over validated YAML documents from a reader.
1144/// As there is no access to the full text of the document, the error message will not contain
1145/// a snippet.
1146#[cfg(feature = "validator")]
1147pub fn read_validate<'a, R, T>(reader: &'a mut R) -> impl Iterator<Item = Result<T, Error>> + 'a
1148where
1149 R: Read + 'a,
1150 T: DeserializeOwned + ValidatorValidate + 'a,
1151{
1152 read_with_options_validate(reader, Default::default())
1153}
1154
1155/// Create an iterator over validated YAML documents from a reader with configurable options.
1156/// As there is no access to the full text of the document, the error message will not contain
1157/// a snippet.
1158#[cfg(feature = "validator")]
1159#[allow(deprecated)]
1160pub fn read_with_options_validate<'a, R, T>(
1161 reader: &'a mut R,
1162 options: Options,
1163) -> impl Iterator<Item = Result<T, Error>> + 'a
1164where
1165 R: Read + 'a,
1166 T: DeserializeOwned + ValidatorValidate + 'a,
1167{
1168 struct ReadValidateIter<'a, T> {
1169 src: LiveEvents<'a>, // borrows from `reader`
1170 cfg: crate::de::Cfg,
1171 finished: bool,
1172 _marker: std::marker::PhantomData<T>,
1173 }
1174
1175 impl<'a, T> Iterator for ReadValidateIter<'a, T>
1176 where
1177 T: DeserializeOwned + ValidatorValidate + 'a,
1178 {
1179 type Item = Result<T, Error>;
1180
1181 fn next(&mut self) -> Option<Self::Item> {
1182 if self.finished {
1183 return None;
1184 }
1185 loop {
1186 match self.src.peek() {
1187 Ok(Some(Ev::Scalar { value, style, .. }))
1188 if scalar_is_nullish(value, style) =>
1189 {
1190 let _ = self.src.next();
1191 continue;
1192 }
1193 Ok(Some(_)) => {
1194 let mut recorder = crate::path_map::PathRecorder::new();
1195 let value_res = crate::anchor_store::with_document_scope(|| {
1196 T::deserialize(crate::de::YamlDeserializer::new_with_path_recorder(
1197 &mut self.src,
1198 self.cfg,
1199 &mut recorder,
1200 ))
1201 });
1202 let value = match value_res {
1203 Ok(v) => v,
1204 Err(e) => {
1205 // After a deserialization error, skip remaining events in the
1206 // current document and try to recover at the next document boundary.
1207 if !self.src.skip_to_next_document() {
1208 self.finished = true;
1209 }
1210 return Some(Err(e));
1211 }
1212 };
1213
1214 match ValidatorValidate::validate(&value) {
1215 Ok(()) => return Some(Ok(value)),
1216 Err(errors) => {
1217 // Validation errors occur after successful deserialization,
1218 // so the parser is already at the document boundary.
1219 // No need to skip or mark as finished - continue to next document.
1220 return Some(Err(Error::ValidatorError {
1221 errors,
1222 locations: recorder.map,
1223 }));
1224 }
1225 }
1226 }
1227 Ok(None) => {
1228 self.finished = true;
1229 if let Err(e) = self.src.finish() {
1230 return Some(Err(e));
1231 }
1232 return None;
1233 }
1234 Err(e) => {
1235 self.finished = true;
1236 let _ = self.src.finish();
1237 return Some(Err(e));
1238 }
1239 }
1240 }
1241 }
1242 }
1243
1244 let cfg = crate::de::Cfg::from_options(&options);
1245 let src = LiveEvents::from_reader(
1246 reader,
1247 options.budget,
1248 options.budget_report,
1249 options.budget_report_cb,
1250 options.alias_limits,
1251 false,
1252 EnforcingPolicy::PerDocument,
1253 );
1254
1255 ReadValidateIter::<T> {
1256 src,
1257 cfg,
1258 finished: false,
1259 _marker: std::marker::PhantomData,
1260 }
1261}
1262
1263pub(crate) fn maybe_with_snippet(
1264 err: Error,
1265 input: &str,
1266 with_snippet: bool,
1267 crop_radius: usize,
1268) -> Error {
1269 if with_snippet && crop_radius > 0 && err.location().is_some() {
1270 err.with_snippet(input, crop_radius)
1271 } else {
1272 err
1273 }
1274}
1275
1276/// Deserialize multiple YAML documents from a single string into a vector of `T`.
1277/// Completely empty documents are ignored and not included into returned vector.
1278///
1279/// Example: read two `Config` documents separated by `---`.
1280///
1281/// ```rust
1282/// use serde::Deserialize;
1283///
1284/// #[derive(Debug, Deserialize, PartialEq)]
1285/// struct Config {
1286/// name: String,
1287/// enabled: bool,
1288/// retries: i32,
1289/// }
1290///
1291/// let yaml = r#"
1292/// name: First
1293/// enabled: true
1294/// retries: 1
1295/// ---
1296/// name: Second
1297/// enabled: false
1298/// retries: 2
1299/// "#;
1300///
1301/// let cfgs: Vec<Config> = serde_saphyr::from_multiple(yaml).unwrap();
1302/// assert_eq!(cfgs.len(), 2);
1303/// assert_eq!(cfgs[0].name, "First");
1304/// ```
1305pub fn from_multiple<T: DeserializeOwned>(input: &str) -> Result<Vec<T>, Error> {
1306 from_multiple_with_options(input, Options::default())
1307}
1308
1309/// Deserialize multiple YAML documents into a vector with configurable [`Options`].
1310///
1311/// Example: two `Config` documents with a custom budget.
1312///
1313/// ```rust
1314/// use serde::Deserialize;
1315/// use serde_saphyr::DuplicateKeyPolicy;
1316///
1317/// #[derive(Debug, Deserialize, PartialEq)]
1318/// struct Config {
1319/// name: String,
1320/// enabled: bool,
1321/// retries: i32,
1322/// }
1323///
1324/// let yaml = r#"
1325/// name: First
1326/// enabled: true
1327/// retries: 1
1328/// ---
1329/// name: Second
1330/// enabled: false
1331/// retries: 2
1332/// "#;
1333///
1334/// let options = serde_saphyr::options! {
1335/// budget: serde_saphyr::budget! {
1336/// max_anchors: 200,
1337/// },
1338/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
1339/// };
1340/// let cfgs: Vec<Config> = serde_saphyr::from_multiple_with_options(yaml, options).unwrap();
1341/// assert_eq!(cfgs.len(), 2);
1342/// assert!(!cfgs[1].enabled);
1343/// ```
1344#[allow(deprecated)]
1345pub fn from_multiple_with_options<T: DeserializeOwned>(
1346 input: &str,
1347 options: Options,
1348) -> Result<Vec<T>, Error> {
1349 // Normalize: ignore a single leading UTF-8 BOM if present.
1350 let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
1351 rest
1352 } else {
1353 input
1354 };
1355 let with_snippet = options.with_snippet;
1356 let crop_radius = options.crop_radius;
1357
1358 let cfg = crate::de::Cfg::from_options(&options);
1359 let mut src = LiveEvents::from_str(
1360 input,
1361 options.budget,
1362 options.budget_report,
1363 options.budget_report_cb,
1364 options.alias_limits,
1365 false,
1366 );
1367 let mut values = Vec::new();
1368
1369 loop {
1370 match src.peek()? {
1371 // Skip documents that are explicit null-like scalars ("", "~", or "null").
1372 Some(Ev::Scalar {
1373 value: s, style, ..
1374 }) if scalar_is_nullish(s, style) => {
1375 let _ = src.next()?; // consume the null scalar document
1376 // Do not push anything for this document; move to the next one.
1377 continue;
1378 }
1379 Some(_) => {
1380 let value_res = crate::anchor_store::with_document_scope(|| {
1381 T::deserialize(crate::de::YamlDeserializer::new(&mut src, cfg))
1382 });
1383 let value = match value_res {
1384 Ok(v) => v,
1385 Err(e) => return Err(maybe_with_snippet(e, input, with_snippet, crop_radius)),
1386 };
1387 values.push(value);
1388 }
1389 None => break,
1390 }
1391 }
1392
1393 src.finish()
1394 .map_err(|e| maybe_with_snippet(e, input, with_snippet, crop_radius))?;
1395 Ok(values)
1396}
1397
1398/// Deserialize a single YAML document from a UTF-8 byte slice.
1399///
1400/// This is equivalent to [`from_str`], but accepts `&[u8]` and validates it is
1401/// valid UTF-8 before parsing.
1402///
1403/// Example: read a small `Config` structure from bytes.
1404///
1405/// ```rust
1406/// use serde::Deserialize;
1407///
1408/// #[derive(Debug, Deserialize, PartialEq)]
1409/// struct Config {
1410/// name: String,
1411/// enabled: bool,
1412/// retries: i32,
1413/// }
1414///
1415/// let yaml = r#"
1416/// name: My Application
1417/// enabled: true
1418/// retries: 5
1419/// "#;
1420/// let bytes = yaml.as_bytes();
1421/// let cfg: Config = serde_saphyr::from_slice(bytes).unwrap();
1422/// assert!(cfg.enabled);
1423/// ```
1424///
1425pub fn from_slice<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, Error> {
1426 from_slice_with_options(bytes, Options::default())
1427}
1428
1429/// Deserialize a single YAML document from a UTF-8 byte slice with configurable [`Options`].
1430///
1431/// Example: read a small `Config` with a custom budget from bytes.
1432///
1433/// ```rust
1434/// use serde::Deserialize;
1435/// use serde_saphyr::DuplicateKeyPolicy;
1436///
1437/// #[derive(Debug, Deserialize, PartialEq)]
1438/// struct Config {
1439/// name: String,
1440/// enabled: bool,
1441/// retries: i32,
1442/// }
1443///
1444/// let yaml = r#"
1445/// name: My Application
1446/// enabled: true
1447/// retries: 5
1448/// "#;
1449/// let bytes = yaml.as_bytes();
1450/// let options = serde_saphyr::options! {
1451/// budget: serde_saphyr::budget! {
1452/// max_anchors: 200,
1453/// },
1454/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
1455/// };
1456/// let cfg: Config = serde_saphyr::from_slice_with_options(bytes, options).unwrap();
1457/// assert_eq!(cfg.retries, 5);
1458/// ```
1459pub fn from_slice_with_options<T: DeserializeOwned>(
1460 bytes: &[u8],
1461 options: Options,
1462) -> Result<T, Error> {
1463 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
1464 from_str_with_options(s, options)
1465}
1466
1467/// Deserialize multiple YAML documents from a UTF-8 byte slice into a vector of `T`.
1468///
1469/// Example: read two `Config` documents separated by `---` from bytes.
1470///
1471/// ```rust
1472/// use serde::Deserialize;
1473///
1474/// #[derive(Debug, Deserialize, PartialEq)]
1475/// struct Config {
1476/// name: String,
1477/// enabled: bool,
1478/// retries: i32,
1479/// }
1480///
1481/// let yaml = r#"
1482/// name: First
1483/// enabled: true
1484/// retries: 1
1485/// ---
1486/// name: Second
1487/// enabled: false
1488/// retries: 2
1489/// "#;
1490/// let bytes = yaml.as_bytes();
1491/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple(bytes).unwrap();
1492/// assert_eq!(cfgs.len(), 2);
1493/// assert_eq!(cfgs[0].name, "First");
1494/// ```
1495pub fn from_slice_multiple<T: DeserializeOwned>(bytes: &[u8]) -> Result<Vec<T>, Error> {
1496 from_slice_multiple_with_options(bytes, Options::default())
1497}
1498
1499/// Deserialize multiple YAML documents from bytes with configurable [`Options`].
1500/// Completely empty documents are ignored and not included into returned vector.
1501///
1502/// Example: two `Config` documents with a custom budget from bytes.
1503///
1504/// ```rust
1505/// use serde::Deserialize;
1506/// use serde_saphyr::DuplicateKeyPolicy;
1507///
1508/// #[derive(Debug, Deserialize, PartialEq)]
1509/// struct Config {
1510/// name: String,
1511/// enabled: bool,
1512/// retries: i32,
1513/// }
1514///
1515/// let yaml = r#"
1516/// name: First
1517/// enabled: true
1518/// retries: 1
1519/// ---
1520/// name: Second
1521/// enabled: false
1522/// retries: 2
1523/// "#;
1524/// let bytes = yaml.as_bytes();
1525/// let options = serde_saphyr::options! {
1526/// budget: serde_saphyr::budget! {
1527/// max_anchors: 200,
1528/// },
1529/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
1530/// };
1531/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple_with_options(bytes, options).unwrap();
1532/// assert_eq!(cfgs.len(), 2);
1533/// assert!(!cfgs[1].enabled);
1534/// ```
1535pub fn from_slice_multiple_with_options<T: DeserializeOwned>(
1536 bytes: &[u8],
1537 options: Options,
1538) -> Result<Vec<T>, Error> {
1539 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
1540 from_multiple_with_options(s, options)
1541}
1542
1543/// Serialize multiple documents into a YAML string.
1544///
1545/// Serializes each value in the provided slice as an individual YAML document.
1546/// Documents are separated by a standard YAML document start marker ("---\n").
1547/// No marker is emitted before the first document.
1548///
1549/// Example
1550///
1551/// ```rust
1552/// use serde::Serialize;
1553///
1554/// #[derive(Serialize)]
1555/// struct Point { x: i32 }
1556///
1557/// let docs = vec![Point { x: 1 }, Point { x: 2 }];
1558/// let out = serde_saphyr::to_string_multiple(&docs).unwrap();
1559/// assert_eq!(out, "x: 1\n---\nx: 2\n");
1560/// ```
1561pub fn to_string_multiple<T: serde::Serialize>(
1562 values: &[T],
1563) -> std::result::Result<String, crate::ser::Error> {
1564 let mut out = String::new();
1565 let mut first = true;
1566 for v in values {
1567 if !first {
1568 out.push_str("---\n");
1569 }
1570 first = false;
1571 to_fmt_writer(&mut out, v)?;
1572 }
1573 Ok(out)
1574}
1575
1576/// Deserialize a single YAML document from any `std::io::Read`.
1577///
1578///
1579/// This method parsers as it reads, without loading the entire input into memory first. Hence,
1580/// budget limits protect against large (potentially malicious) input.
1581///
1582/// Example
1583///
1584/// ```rust
1585/// use serde::{Deserialize, Serialize};
1586/// use std::collections::HashMap;
1587/// use serde_json::Value;
1588///
1589/// #[derive(Debug, PartialEq, Serialize, Deserialize)]
1590/// struct Point {
1591/// x: i32,
1592/// y: i32,
1593/// }
1594///
1595/// let yaml = "x: 3\ny: 4\n";
1596/// let reader = std::io::Cursor::new(yaml.as_bytes());
1597/// let p: Point = serde_saphyr::from_reader(reader).unwrap();
1598/// assert_eq!(p, Point { x: 3, y: 4 });
1599///
1600/// // It also works for dynamic values like serde_json::Value
1601/// let mut big = String::new();
1602/// let mut i = 0usize;
1603/// while big.len() < 64 * 1024 { big.push_str(&format!("k{0}: v{0}\n", i)); i += 1; }
1604/// let reader = std::io::Cursor::new(big.as_bytes().to_owned());
1605/// let _value: Value = serde_saphyr::from_reader(reader).unwrap();
1606/// ```
1607pub fn from_reader<'a, R: std::io::Read + 'a, T: DeserializeOwned>(reader: R) -> Result<T, Error> {
1608 from_reader_with_options(reader, Options::default())
1609}
1610
1611/// Deserialize a single YAML document from any `std::io::Read` with configurable `Options`.
1612///
1613/// This is the reader-based counterpart to [`from_str_with_options`]. It consumes a
1614/// byte-oriented reader, decodes it to UTF-8, and streams events into the deserializer.
1615///
1616/// This method parsers as it reads, without loading the entire input into memory first. Hence,
1617/// budget limits protect against large (potentially malicious) input.
1618///
1619/// Notes on limits and large inputs
1620/// - Parsing limits: Use [`Options::budget`] to constrain YAML complexity (events, nodes,
1621/// nesting depth, total scalar bytes, number of documents, anchors, aliases, etc.). These
1622/// limits are enforced during parsing and are enabled by default via `Options::default()`.
1623/// - Byte-level input cap: from_slice_multiple hard cap on input bytes is enforced via `Options::budget.max_reader_input_bytes`.
1624/// The default budget sets this to 256 MiB. You can override it by customizing `Options::budget`.
1625/// When the cap is exceeded, deserialization fails early with a budget error.
1626///
1627/// Example: limit raw input bytes and customize options
1628/// ```rust
1629/// use std::io::{Read, Cursor};
1630/// use serde::Deserialize;
1631/// use serde_saphyr::{Budget, Options};
1632///
1633/// #[derive(Debug, Deserialize, PartialEq)]
1634/// struct Point { x: i32, y: i32 }
1635///
1636/// let yaml = "x: 3\ny: 4\n";
1637/// let reader = Cursor::new(yaml.as_bytes());
1638///
1639/// let opts = serde_saphyr::options! {
1640/// budget: serde_saphyr::budget! {
1641/// max_events: 10_000,
1642/// max_reader_input_bytes: Some(1024),
1643/// },
1644/// };
1645///
1646/// let p: Point = serde_saphyr::from_reader_with_options(reader, opts).unwrap();
1647/// assert_eq!(p, Point { x: 3, y: 4 });
1648/// ```
1649///
1650/// Error behavior
1651/// - If an empty document is provided (no content), a type-mismatch (eof) error is returned when
1652/// attempting to deserialize into non-null-like targets.
1653/// - If the reader contains multiple documents, an error is returned suggesting the
1654/// `read`/`read_with_options` iterator APIs.
1655/// - If `Options::budget` is set and a limit is exceeded, an error is returned early.
1656#[allow(deprecated)]
1657pub fn from_reader_with_options<'a, R: std::io::Read + 'a, T: DeserializeOwned>(
1658 reader: R,
1659 options: Options,
1660) -> Result<T, Error> {
1661 let cfg = crate::de::Cfg::from_options(&options);
1662 let crop_radius = options.crop_radius;
1663
1664 // Wrap the reader in a SharedRingReader to capture context for error snippets
1665 let shared_ring = ring_reader::SharedRingReader::new(reader);
1666 let ring_handle = ring_reader::SharedRingReaderHandle::new(&shared_ring);
1667
1668 let mut src = LiveEvents::from_reader(
1669 ring_handle,
1670 options.budget,
1671 options.budget_report,
1672 options.budget_report_cb,
1673 options.alias_limits,
1674 false,
1675 EnforcingPolicy::AllContent,
1676 );
1677
1678 // Helper to attach snippet to an error using the RingReader's context
1679 let attach_snippet = |e: Error| -> Error {
1680 if crop_radius == 0 {
1681 return e;
1682 }
1683 match shared_ring.get_recent() {
1684 Ok(snapshot) => {
1685 let text = String::from_utf8_lossy(&snapshot.bytes);
1686 e.with_snippet_offset(&text, snapshot.start_line, crop_radius)
1687 }
1688 Err(_) => e, // If we can't get the snapshot, return the error as-is
1689 }
1690 };
1691
1692 let value_res = crate::anchor_store::with_document_scope(|| {
1693 T::deserialize(crate::de::YamlDeserializer::new(&mut src, cfg))
1694 });
1695 let value = match value_res {
1696 Ok(v) => v,
1697 Err(e) => {
1698 if src.synthesized_null_emitted() {
1699 // If the only thing in the input was an empty document (synthetic null),
1700 // surface this as an EOF error to preserve expected error semantics
1701 // for incompatible target types (e.g., bool).
1702 return Err(attach_snippet(
1703 Error::eof().with_location(src.last_location()),
1704 ));
1705 } else {
1706 return Err(attach_snippet(e));
1707 }
1708 }
1709 };
1710
1711 // After finishing first document, peek ahead to detect either another document/content
1712 // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
1713 // ignore the trailing garbage. Otherwise, surface the error.
1714 match src.peek() {
1715 Ok(Some(_)) => {
1716 return Err(attach_snippet(Error::msg(
1717 "multiple YAML documents detected; use read or read_with_options to obtain the iterator",
1718 )
1719 .with_location(src.last_location())));
1720 }
1721 Ok(None) => {}
1722 Err(e) => {
1723 if src.seen_doc_end() {
1724 // Trailing garbage after a proper document end marker is ignored.
1725 } else {
1726 return Err(attach_snippet(e));
1727 }
1728 }
1729 }
1730
1731 if let Err(e) = src.finish() {
1732 return Err(attach_snippet(e));
1733 }
1734 Ok(value)
1735}
1736
1737/// Create an iterator over YAML documents from any `std::io::Read` using default options.
1738///
1739/// This is a convenience wrapper around [`read_with_options`] that uses the
1740/// same defaults as [`Options::default`] **except** it disables the
1741/// `max_reader_input_bytes` budget to better support long-lived streams.
1742///
1743/// - It streams the reader without loading the whole input into memory.
1744/// - Each item produced by the returned iterator is one deserialized YAML document of type `T`.
1745/// - Documents that are completely empty or null-like (e.g., `"", ~, null`) are skipped.
1746///
1747/// Generic parameters
1748/// - `R`: the concrete reader type implementing [`std::io::Read`]. You almost never need to
1749/// write this explicitly; the compiler will infer it from the `reader` you pass. When using
1750/// turbofish, write `_` to let the compiler infer `R`.
1751/// - `T`: the type to deserialize each YAML document into. Must implement [`serde::de::DeserializeOwned`].
1752///
1753/// Lifetimes
1754/// - `'a`: the lifetime of the returned iterator, tied to the lifetime of the provided `reader`.
1755/// The iterator cannot outlive the reader it was created from.
1756///
1757/// Limits and budget
1758/// - Uses the same limits as `Options::default()` (events, nodes, nesting depth, total scalar
1759/// bytes) and the default alias-replay caps. The only change is that
1760/// `Budget::max_reader_input_bytes` is set to `None` so the streaming iterator can handle
1761/// arbitrarily long inputs. To customize these limits, call [`read_with_options`] and set
1762/// `Options::budget.max_reader_input_bytes` in the provided `Options`.
1763/// - Alias replay limits are also enforced with their default values to mitigate alias bombs.
1764///
1765/// ```rust
1766/// use serde::Deserialize;
1767///
1768/// #[derive(Debug, Deserialize, PartialEq)]
1769/// struct Simple { id: usize }
1770///
1771/// let yaml = b"id: 1\n---\nid: 2\n";
1772/// let mut reader = std::io::Cursor::new(&yaml[..]);
1773///
1774/// // Type `T` is inferred from the collection target (Vec<Simple>).
1775/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
1776/// .map(|r| r.unwrap())
1777/// .collect();
1778/// assert_eq!(values.len(), 2);
1779/// assert_eq!(values[0].id, 1);
1780/// ```
1781///
1782/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
1783/// ```rust
1784/// use serde::Deserialize;
1785///
1786/// #[derive(Debug, Deserialize, PartialEq)]
1787/// struct Simple { id: usize }
1788///
1789/// let yaml = b"id: 10\n---\nid: 20\n";
1790/// let mut reader = std::io::Cursor::new(&yaml[..]);
1791///
1792/// // First turbofish parameter is R (reader type), `_` lets the compiler infer it.
1793/// let iter = serde_saphyr::read::<_, Simple>(&mut reader);
1794/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
1795/// assert_eq!(ids, vec![10, 20]);
1796/// ```
1797///
1798/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
1799/// if parsing fails or a limit is exceeded. After an error, the iterator ends.
1800/// - Empty/null-like documents are skipped and produce no items.
1801///
1802/// *Note* Some content of the next document is read before the current parsed document is emitted.
1803/// Hence, while streaming is good for safely parsing large files with multiple documents without
1804/// loading it into RAM in advance, it does not emit each document exactly
1805/// after `---` is encountered.
1806pub fn read<'a, R, T>(reader: &'a mut R) -> Box<dyn Iterator<Item = Result<T, Error>> + 'a>
1807where
1808 R: Read + 'a,
1809 T: DeserializeOwned + 'a,
1810{
1811 Box::new(read_with_options(
1812 reader,
1813 crate::options! {
1814 budget: crate::budget! {
1815 max_reader_input_bytes: None,
1816 },
1817 },
1818 ))
1819}
1820
1821/// Create an iterator over YAML documents from any `std::io::Read`, with configurable options.
1822///
1823/// This is the multi-document counterpart to [`from_reader_with_options`]. It does not load
1824/// the entire input into memory. Instead, it streams the reader, deserializing one document
1825/// at a time into values of type `T`, yielding them through the returned iterator. Documents
1826/// that are completely empty or null-like (e.g., `""`, `~`, or `null`) are skipped.
1827///
1828/// Generic parameters
1829/// - `R`: the concrete reader type that implements [`std::io::Read`]. You rarely need to spell
1830/// this out; it is almost always inferred from the `reader` value you pass in. When using
1831/// turbofish, you can write `_` for this parameter to let the compiler infer it.
1832/// - `T`: the type to deserialize each YAML document into. This must implement [`serde::de::DeserializeOwned`].
1833///
1834/// Lifetimes
1835/// - `'a`: the lifetime of the returned iterator. It is tied to the lifetime of the provided
1836/// `reader` value because the iterator borrows internal state that references the reader.
1837/// In practice, this means the iterator cannot outlive the reader it was created from.
1838///
1839/// Limits and budget
1840/// - All parsing limits configured via [`Options::budget`] (such as maximum events, nodes,
1841/// nesting depth, total scalar bytes) are enforced while streaming. from_slice_multiple hard input-byte cap
1842/// is also enforced via `Budget::max_reader_input_bytes` (256 MiB by default), set this
1843/// to None if you need a streamer to exist for arbitrary long time.
1844/// - Alias replay limits from [`Options::alias_limits`] are also enforced to mitigate alias bombs.
1845///
1846/// ```rust
1847/// use serde::Deserialize;
1848///
1849/// #[derive(Debug, Deserialize, PartialEq)]
1850/// struct Simple { id: usize }
1851///
1852/// let yaml = b"id: 1\n---\nid: 2\n";
1853/// let mut reader = std::io::Cursor::new(&yaml[..]);
1854///
1855/// // Type `T` is inferred from the collection target (Vec<Simple>).
1856/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
1857/// .map(|r| r.unwrap())
1858/// .collect();
1859/// assert_eq!(values.len(), 2);
1860/// assert_eq!(values[0].id, 1);
1861/// ```
1862///
1863/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
1864/// ```rust
1865/// use serde::Deserialize;
1866///
1867/// #[derive(Debug, Deserialize, PartialEq)]
1868/// struct Simple { id: usize }
1869///
1870/// let yaml = b"id: 10\n---\nid: 20\n";
1871/// let mut reader = std::io::Cursor::new(&yaml[..]);
1872///
1873/// // First turbofish parameter is R (reader type) which we let the compiler infer via `_`.
1874/// let iter = serde_saphyr::read_with_options::<_, Simple>(&mut reader, serde_saphyr::Options::default());
1875/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
1876/// assert_eq!(ids, vec![10, 20]);
1877/// ```
1878///
1879/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
1880/// if parsing or deserialization fails.
1881/// - After a **deserialization error** (e.g., type mismatch, missing field), the iterator
1882/// automatically recovers by skipping to the next document boundary (`---`) and continues
1883/// iteration. This allows processing subsequent valid documents even when some fail.
1884/// - After a **syntax error** or **budget/alias limit exceeded**, the iterator ends because
1885/// the parser state may be unrecoverable.
1886/// - Empty/null-like documents are skipped and produce no items.
1887#[allow(deprecated)]
1888pub fn read_with_options<'a, R, T>(
1889 reader: &'a mut R, // iterator must not outlive this borrow
1890 options: Options,
1891) -> impl Iterator<Item = Result<T, Error>> + 'a
1892where
1893 R: Read + 'a,
1894 T: DeserializeOwned + 'a,
1895{
1896 struct ReadIter<'a, T> {
1897 src: LiveEvents<'a>, // borrows from `reader`
1898 cfg: crate::de::Cfg,
1899 finished: bool,
1900 _marker: std::marker::PhantomData<T>,
1901 }
1902
1903 impl<'a, T> Iterator for ReadIter<'a, T>
1904 where
1905 T: DeserializeOwned + 'a,
1906 {
1907 type Item = Result<T, Error>;
1908
1909 fn next(&mut self) -> Option<Self::Item> {
1910 if self.finished {
1911 return None;
1912 }
1913 loop {
1914 match self.src.peek() {
1915 Ok(Some(Ev::Scalar { value, style, .. }))
1916 if scalar_is_nullish(value, style) =>
1917 {
1918 let _ = self.src.next();
1919 continue;
1920 }
1921 Ok(Some(_)) => {
1922 let res = crate::anchor_store::with_document_scope(|| {
1923 T::deserialize(crate::de::YamlDeserializer::new(
1924 &mut self.src,
1925 self.cfg,
1926 ))
1927 });
1928 if res.is_err() {
1929 // After a deserialization error, skip remaining events in the
1930 // current document and try to recover at the next document boundary.
1931 // If no next document is found, mark as finished.
1932 if !self.src.skip_to_next_document() {
1933 self.finished = true;
1934 }
1935 }
1936 return Some(res);
1937 }
1938 Ok(None) => {
1939 self.finished = true;
1940 if let Err(e) = self.src.finish() {
1941 return Some(Err(e));
1942 }
1943 return None;
1944 }
1945 Err(e) => {
1946 self.finished = true;
1947 let _ = self.src.finish();
1948 return Some(Err(e));
1949 }
1950 }
1951 }
1952 }
1953 }
1954
1955 let cfg = crate::de::Cfg::from_options(&options);
1956 let src = LiveEvents::from_reader(
1957 reader,
1958 options.budget,
1959 options.budget_report,
1960 options.budget_report_cb,
1961 options.alias_limits,
1962 false,
1963 EnforcingPolicy::PerDocument,
1964 );
1965
1966 ReadIter::<T> {
1967 src,
1968 cfg,
1969 finished: false,
1970 _marker: std::marker::PhantomData,
1971 }
1972}