serde_saphyr/
lib.rs

1/// Serialization public API is defined at crate root
2pub use anchors::{ArcAnchor, ArcWeakAnchor, RcAnchor, RcWeakAnchor};
3pub use de::{Budget, DuplicateKeyPolicy, Error, Location, Options};
4pub use ser::{Commented, FlowMap, FlowSeq, FoldStr, LitStr};
5
6use crate::budget::EnforcingPolicy;
7use crate::de::{Ev, Events};
8use crate::live_events::LiveEvents;
9use crate::parse_scalars::scalar_is_nullish;
10pub use crate::serializer_options::SerializerOptions;
11use serde::de::DeserializeOwned;
12use std::io::Read;
13
14mod anchor_store;
15mod anchors;
16mod base64;
17pub mod budget;
18mod de;
19mod de_error;
20mod live_events;
21pub mod options;
22mod parse_scalars;
23mod ser;
24
25pub mod ser_error;
26
27mod serializer_options;
28mod tags;
29
30pub(crate) mod ser_quoting;
31
32#[cfg(feature = "robotics")]
33pub mod angles_conversions;
34mod buffered_input;
35// ---------------- Serialization (public API) ----------------
36
37/// Serialize a value to a YAML `String`.
38///
39/// This is the easiest entry point when you just want a YAML string.
40///
41/// Example
42///
43/// ```rust
44/// use serde::Serialize;
45///
46/// #[derive(Serialize)]
47/// struct Foo { a: i32, b: bool }
48///
49/// let s = serde_saphyr::to_string(&Foo { a: 1, b: true }).unwrap();
50/// assert!(s.contains("a: 1"));
51/// ```
52pub fn to_string<T: serde::Serialize>(value: &T) -> std::result::Result<String, crate::ser::Error> {
53    let mut out = String::new();
54    to_fmt_writer(&mut out, value)?;
55    Ok(out)
56}
57
58/// Deprecated: use `to_fmt_writer` or `to_io_writer`
59/// Kept for a transition release to avoid instant breakage.
60#[deprecated(
61    since = "0.0.7",
62    note = "Use `to_fmt_writer` for `fmt::Write` (String, fmt::Formatter) or `to_io_writer` for files/sockets."
63)]
64pub fn to_writer<W: std::fmt::Write, T: serde::Serialize>(
65    output: &mut W,
66    value: &T,
67) -> std::result::Result<(), crate::ser::Error> {
68    let mut ser = crate::ser::YamlSer::new(output);
69    value.serialize(&mut ser)
70}
71
72/// Serialize a value as YAML into any [`fmt::Write`] target.
73pub fn to_fmt_writer<W: std::fmt::Write, T: serde::Serialize>(
74    output: &mut W,
75    value: &T,
76) -> std::result::Result<(), crate::ser::Error> {
77    to_fmt_writer_with_options(output, value, SerializerOptions::default())
78}
79
80/// Serialize a value as YAML into any [`io::Write`] target.
81pub fn to_io_writer<W: std::io::Write, T: serde::Serialize>(
82    output: &mut W,
83    value: &T,
84) -> std::result::Result<(), crate::ser::Error> {
85    to_io_writer_with_options(output, value, SerializerOptions::default())
86}
87
88/// Serialize a value as YAML into any [`fmt::Write`] target, with options.
89/// Options are consumed because anchor generator may be taken from them.
90pub fn to_fmt_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
91    output: &mut W,
92    value: &T,
93    mut options: SerializerOptions,
94) -> std::result::Result<(), crate::ser::Error> {
95    let mut ser = crate::ser::YamlSer::with_options(output, &mut options);
96    value.serialize(&mut ser)
97}
98
99/// Serialize a value as YAML into any [`io::Write`] target, with options.
100/// Options are consumed because anchor generator may be taken from them.
101pub fn to_io_writer_with_options<W: std::io::Write, T: serde::Serialize>(
102    output: &mut W,
103    value: &T,
104    mut options: SerializerOptions,
105) -> std::result::Result<(), crate::ser::Error> {
106    struct Adapter<'a, W: std::io::Write> {
107        output: &'a mut W,
108        last_err: Option<std::io::Error>,
109    }
110    impl<'a, W: std::io::Write> std::fmt::Write for Adapter<'a, W> {
111        fn write_str(&mut self, s: &str) -> std::fmt::Result {
112            if let Err(e) = self.output.write_all(s.as_bytes()) {
113                self.last_err = Some(e);
114                return Err(std::fmt::Error);
115            }
116            Ok(())
117        }
118        fn write_char(&mut self, c: char) -> std::fmt::Result {
119            let mut buf = [0u8; 4];
120            let s = c.encode_utf8(&mut buf);
121            self.write_str(s)
122        }
123    }
124    let mut adapter = Adapter {
125        output: output,
126        last_err: None,
127    };
128    let mut ser = crate::ser::YamlSer::with_options(&mut adapter, &mut options);
129    match value.serialize(&mut ser) {
130        Ok(()) => Ok(()),
131        Err(e) => {
132            if let Some(io_error) = adapter.last_err.take() {
133                return Err(crate::ser::Error::from(io_error));
134            }
135            Err(e)
136        }
137    }
138}
139
140/// Deprecated: use `to_fmt_writer_with_options` for `fmt::Write` or `to_io_writer_with_options` for `io::Write`.
141#[deprecated(
142    since = "0.0.7",
143    note = "Use `to_fmt_writer_with_options` for fmt::Write or `to_io_writer_with_options` for io::Write."
144)]
145pub fn to_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
146    output: &mut W,
147    value: &T,
148    options: SerializerOptions,
149) -> std::result::Result<(), crate::ser::Error> {
150    to_fmt_writer_with_options(output, value, options)
151}
152
153/// Deserialize any `T: serde::de::DeserializeOwned` directly from a YAML string.
154///
155/// This is the simplest entry point; it parses a single YAML document. If the
156/// input contains multiple documents, this returns an error advising to use
157/// [`from_multiple`] or [`from_multiple_with_options`].
158///
159/// Example: read a small `Config` structure from a YAML string.
160///
161/// ```rust
162/// use serde::Deserialize;
163///
164/// #[derive(Debug, Deserialize, PartialEq)]
165/// struct Config {
166///     name: String,
167///     enabled: bool,
168///     retries: i32,
169/// }
170///
171/// let yaml = r#"
172///     name: My Application
173///     enabled: true
174///     retries: 5
175/// "#;
176///
177/// let cfg: Config = serde_saphyr::from_str(yaml).unwrap();
178/// assert!(cfg.enabled);
179/// ```
180pub fn from_str<T: DeserializeOwned>(input: &str) -> Result<T, Error> {
181    from_str_with_options(input, Options::default())
182}
183
184/// Deserialize a single YAML document with configurable [`Options`].
185///
186/// Example: read a small `Config` with a custom budget and default duplicate-key policy.
187///
188/// ```rust
189/// use serde::Deserialize;
190/// use serde_saphyr::DuplicateKeyPolicy;
191///
192/// #[derive(Debug, Deserialize, PartialEq)]
193/// struct Config {
194///     name: String,
195///     enabled: bool,
196///     retries: i32,
197/// }
198///
199/// let yaml = r#"
200///      name: My Application
201///      enabled: true
202///      retries: 5
203/// "#;
204///
205/// let options = serde_saphyr::Options {
206///      budget: Some(serde_saphyr::Budget {
207///            max_anchors: 200,
208///            .. serde_saphyr::Budget::default()
209///      }),
210///     duplicate_keys: DuplicateKeyPolicy::FirstWins,
211///     .. serde_saphyr::Options::default()
212/// };
213/// let cfg: Config = serde_saphyr::from_str_with_options(yaml, options).unwrap();
214/// assert_eq!(cfg.retries, 5);
215/// ```
216pub fn from_str_with_options<T: DeserializeOwned>(
217    input: &str,
218    options: Options,
219) -> Result<T, Error> {
220    // Normalize: ignore a single leading UTF-8 BOM if present.
221    let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
222        rest
223    } else {
224        input
225    };
226
227    let cfg = crate::de::Cfg::from_options(&options);
228    // Do not stop at DocumentEnd; we'll probe for trailing content/errors explicitly.
229    let mut src = LiveEvents::from_str(input, options.budget, options.alias_limits, false);
230    let value_res = crate::anchor_store::with_document_scope(|| {
231        T::deserialize(crate::de::Deser::new(&mut src, cfg))
232    });
233    let value = match value_res {
234        Ok(v) => v,
235        Err(e) => {
236            if src.synthesized_null_emitted() {
237                // If the only thing in the input was an empty document (synthetic null),
238                // surface this as an EOF error to preserve expected error semantics
239                // for incompatible target types (e.g., bool).
240                return Err(Error::eof().with_location(src.last_location()));
241            } else {
242                return Err(e);
243            }
244        }
245    };
246
247    // After finishing first document, peek ahead to detect either another document/content
248    // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
249    // ignore the trailing garbage. Otherwise, surface the error.
250    match src.peek() {
251        Ok(Some(_)) => {
252            return Err(Error::msg(
253                "multiple YAML documents detected; use from_multiple or from_multiple_with_options",
254            )
255            .with_location(src.last_location()));
256        }
257        Ok(None) => {}
258        Err(e) => {
259            if src.seen_doc_end() {
260                // Trailing garbage after a proper document end marker is ignored.
261            } else {
262                return Err(e);
263            }
264        }
265    }
266
267    src.finish()?;
268    Ok(value)
269}
270
271/// Deserialize multiple YAML documents from a single string into a vector of `T`.
272/// Completely empty documents are ignored and not included into returned vector.
273///
274/// Example: read two `Config` documents separated by `---`.
275///
276/// ```rust
277/// use serde::Deserialize;
278///
279/// #[derive(Debug, Deserialize, PartialEq)]
280/// struct Config {
281///     name: String,
282///     enabled: bool,
283///     retries: i32,
284/// }
285///
286/// let yaml = r#"
287/// name: First
288/// enabled: true
289/// retries: 1
290/// ---
291/// name: Second
292/// enabled: false
293/// retries: 2
294/// "#;
295///
296/// let cfgs: Vec<Config> = serde_saphyr::from_multiple(yaml).unwrap();
297/// assert_eq!(cfgs.len(), 2);
298/// assert_eq!(cfgs[0].name, "First");
299/// ```
300pub fn from_multiple<T: DeserializeOwned>(input: &str) -> Result<Vec<T>, Error> {
301    from_multiple_with_options(input, Options::default())
302}
303
304/// Deserialize multiple YAML documents into a vector with configurable [`Options`].
305///
306/// Example: two `Config` documents with a custom budget.
307///
308/// ```rust
309/// use serde::Deserialize;
310/// use serde_saphyr::DuplicateKeyPolicy;
311///
312/// #[derive(Debug, Deserialize, PartialEq)]
313/// struct Config {
314///     name: String,
315///     enabled: bool,
316///     retries: i32,
317/// }
318///
319/// let yaml = r#"
320/// name: First
321/// enabled: true
322/// retries: 1
323/// ---
324/// name: Second
325/// enabled: false
326/// retries: 2
327/// "#;
328///
329/// let options = serde_saphyr::Options {
330///      budget: Some(serde_saphyr::Budget {
331///            max_anchors: 200,
332///            .. serde_saphyr::Budget::default()
333///      }),
334///     duplicate_keys: DuplicateKeyPolicy::FirstWins,
335///     .. serde_saphyr::Options::default()
336/// };
337/// let cfgs: Vec<Config> = serde_saphyr::from_multiple_with_options(yaml, options).unwrap();
338/// assert_eq!(cfgs.len(), 2);
339/// assert!(!cfgs[1].enabled);
340/// ```
341pub fn from_multiple_with_options<T: DeserializeOwned>(
342    input: &str,
343    options: Options,
344) -> Result<Vec<T>, Error> {
345    // Normalize: ignore a single leading UTF-8 BOM if present.
346    let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
347        rest
348    } else {
349        input
350    };
351    let cfg = crate::de::Cfg::from_options(&options);
352    let mut src = LiveEvents::from_str(input, options.budget, options.alias_limits, false);
353    let mut values = Vec::new();
354
355    loop {
356        match src.peek()? {
357            // Skip documents that are explicit null-like scalars ("", "~", or "null").
358            Some(Ev::Scalar {
359                value: s, style, ..
360            }) if scalar_is_nullish(s, style) => {
361                let _ = src.next()?; // consume the null scalar document
362                // Do not push anything for this document; move to the next one.
363                continue;
364            }
365            Some(_) => {
366                let value = crate::anchor_store::with_document_scope(|| {
367                    T::deserialize(crate::de::Deser::new(&mut src, cfg))
368                })?;
369                values.push(value);
370            }
371            None => break,
372        }
373    }
374
375    src.finish()?;
376    Ok(values)
377}
378
379/// Deserialize a single YAML document from a UTF-8 byte slice.
380///
381/// This is equivalent to [`from_str`], but accepts `&[u8]` and validates it is
382/// valid UTF-8 before parsing.
383///
384/// Example: read a small `Config` structure from bytes.
385///
386/// ```rust
387/// use serde::Deserialize;
388///
389/// #[derive(Debug, Deserialize, PartialEq)]
390/// struct Config {
391///     name: String,
392///     enabled: bool,
393///     retries: i32,
394/// }
395///
396/// let yaml = r#"
397/// name: My Application
398/// enabled: true
399/// retries: 5
400/// "#;
401/// let bytes = yaml.as_bytes();
402/// let cfg: Config = serde_saphyr::from_slice(bytes).unwrap();
403/// assert!(cfg.enabled);
404/// ```
405///
406pub fn from_slice<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, Error> {
407    from_slice_with_options(bytes, Options::default())
408}
409
410/// Deserialize a single YAML document from a UTF-8 byte slice with configurable [`Options`].
411///
412/// Example: read a small `Config` with a custom budget from bytes.
413///
414/// ```rust
415/// use serde::Deserialize;
416/// use serde_saphyr::DuplicateKeyPolicy;
417///
418/// #[derive(Debug, Deserialize, PartialEq)]
419/// struct Config {
420///     name: String,
421///     enabled: bool,
422///     retries: i32,
423/// }
424///
425/// let yaml = r#"
426///      name: My Application
427///      enabled: true
428///      retries: 5
429/// "#;
430/// let bytes = yaml.as_bytes();
431/// let options = serde_saphyr::Options {
432///      budget: Some(serde_saphyr::Budget {
433///            max_anchors: 200,
434///            .. serde_saphyr::Budget::default()
435///      }),
436///     duplicate_keys: DuplicateKeyPolicy::FirstWins,
437///     .. serde_saphyr::Options::default()
438/// };
439/// let cfg: Config = serde_saphyr::from_slice_with_options(bytes, options).unwrap();
440/// assert_eq!(cfg.retries, 5);
441/// ```
442pub fn from_slice_with_options<T: DeserializeOwned>(
443    bytes: &[u8],
444    options: Options,
445) -> Result<T, Error> {
446    let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
447    from_str_with_options(s, options)
448}
449
450/// Deserialize multiple YAML documents from a UTF-8 byte slice into a vector of `T`.
451///
452/// Example: read two `Config` documents separated by `---` from bytes.
453///
454/// ```rust
455/// use serde::Deserialize;
456///
457/// #[derive(Debug, Deserialize, PartialEq)]
458/// struct Config {
459///     name: String,
460///     enabled: bool,
461///     retries: i32,
462/// }
463///
464/// let yaml = r#"
465/// name: First
466/// enabled: true
467/// retries: 1
468/// ---
469/// name: Second
470/// enabled: false
471/// retries: 2
472/// "#;
473/// let bytes = yaml.as_bytes();
474/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple(bytes).unwrap();
475/// assert_eq!(cfgs.len(), 2);
476/// assert_eq!(cfgs[0].name, "First");
477/// ```
478pub fn from_slice_multiple<T: DeserializeOwned>(bytes: &[u8]) -> Result<Vec<T>, Error> {
479    from_slice_multiple_with_options(bytes, Options::default())
480}
481
482/// Deserialize multiple YAML documents from bytes with configurable [`Options`].
483/// Completely empty documents are ignored and not included into returned vector.
484///
485/// Example: two `Config` documents with a custom budget from bytes.
486///
487/// ```rust
488/// use serde::Deserialize;
489/// use serde_saphyr::DuplicateKeyPolicy;
490///
491/// #[derive(Debug, Deserialize, PartialEq)]
492/// struct Config {
493///     name: String,
494///     enabled: bool,
495///     retries: i32,
496/// }
497///
498/// let yaml = r#"
499/// name: First
500/// enabled: true
501/// retries: 1
502/// ---
503/// name: Second
504/// enabled: false
505/// retries: 2
506/// "#;
507/// let bytes = yaml.as_bytes();
508/// let options = serde_saphyr::Options {
509///      budget: Some(serde_saphyr::Budget {
510///            max_anchors: 200,
511///            .. serde_saphyr::Budget::default()
512///      }),
513///     duplicate_keys: DuplicateKeyPolicy::FirstWins,
514///     .. serde_saphyr::Options::default()
515/// };
516/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple_with_options(bytes, options).unwrap();
517/// assert_eq!(cfgs.len(), 2);
518/// assert!(!cfgs[1].enabled);
519/// ```
520pub fn from_slice_multiple_with_options<T: DeserializeOwned>(
521    bytes: &[u8],
522    options: Options,
523) -> Result<Vec<T>, Error> {
524    let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
525    from_multiple_with_options(s, options)
526}
527
528/// Serialize multiple documents into a YAML string.
529///
530/// Serializes each value in the provided slice as an individual YAML document.
531/// Documents are separated by a standard YAML document start marker ("---\n").
532/// No marker is emitted before the first document.
533///
534/// Example
535///
536/// ```rust
537/// use serde::Serialize;
538///
539/// #[derive(Serialize)]
540/// struct Point { x: i32 }
541///
542/// let docs = vec![Point { x: 1 }, Point { x: 2 }];
543/// let out = serde_saphyr::to_string_multiple(&docs).unwrap();
544/// assert_eq!(out, "x: 1\n---\nx: 2\n");
545/// ```
546pub fn to_string_multiple<T: serde::Serialize>(
547    values: &[T],
548) -> std::result::Result<String, crate::ser::Error> {
549    let mut out = String::new();
550    let mut first = true;
551    for v in values {
552        if !first {
553            out.push_str("---\n");
554        }
555        first = false;
556        to_fmt_writer(&mut out, v)?;
557    }
558    Ok(out)
559}
560
561/// Deserialize a single YAML document from any `std::io::Read`.
562///
563/// The entire reader is read into memory (buffered) and then deserialized
564/// using the same logic as [`from_slice`]. This function is convenient when
565/// your YAML input comes from a file or any other IO stream.
566///
567/// Example
568///
569/// ```rust
570/// use serde::{Deserialize, Serialize};
571/// use std::collections::HashMap;
572/// use serde_json::Value;
573///
574/// #[derive(Debug, PartialEq, Serialize, Deserialize)]
575/// struct Point {
576///     x: i32,
577///     y: i32,
578/// }
579///
580/// let yaml = "x: 3\ny: 4\n";
581/// let reader = std::io::Cursor::new(yaml.as_bytes());
582/// let p: Point = serde_saphyr::from_reader(reader).unwrap();
583/// assert_eq!(p, Point { x: 3, y: 4 });
584///
585/// // It also works for dynamic values like serde_json::Value
586/// let mut big = String::new();
587/// let mut i = 0usize;
588/// while big.len() < 64 * 1024 { big.push_str(&format!("k{0}: v{0}\n", i)); i += 1; }
589/// let reader = std::io::Cursor::new(big.as_bytes().to_owned());
590/// let _value: Value = serde_saphyr::from_reader(reader).unwrap();
591/// ```
592pub fn from_reader<'a, R: std::io::Read + 'a, T: DeserializeOwned>(reader: R) -> Result<T, Error> {
593    from_reader_with_options(reader, Options::default())
594}
595
596/// Deserialize a single YAML document from any `std::io::Read` with configurable `Options`.
597///
598/// This is the reader-based counterpart to [`from_str_with_options`]. It consumes a
599/// byte-oriented reader, decodes it to UTF-8, and streams events into the deserializer.
600///
601/// Notes on limits and large inputs
602/// - Parsing limits: Use [`Options::budget`] to constrain YAML complexity (events, nodes,
603///   nesting depth, total scalar bytes, number of documents, anchors, aliases, etc.). These
604///   limits are enforced during parsing and are enabled by default via `Options::default()`.
605/// - Byte-level input cap: A hard cap on input bytes is enforced via `Options::budget.max_reader_input_bytes`.
606///   The default budget sets this to 256 MiB. You can override it by customizing `Options::budget`.
607///   When the cap is exceeded, deserialization fails early with a budget error.
608///
609/// Example: limit raw input bytes and customize options
610/// ```rust
611/// use std::io::{Read, Cursor};
612/// use serde::Deserialize;
613/// use serde_saphyr::{Budget, Options};
614///
615/// #[derive(Debug, Deserialize, PartialEq)]
616/// struct Point { x: i32, y: i32 }
617///
618/// let yaml = "x: 3\ny: 4\n";
619/// let reader = Cursor::new(yaml.as_bytes());
620///
621/// // Cap the reader to at most 1 KiB of input bytes.
622/// let capped = reader.take(1024);
623///
624/// // Tighten the parsing budget as well (optional).
625/// let mut opts = Options::default();
626/// opts.budget = Some(Budget { max_events: 10_000, ..Budget::default() });
627///
628/// let p: Point = serde_saphyr::from_reader_with_options(capped, opts).unwrap();
629/// assert_eq!(p, Point { x: 3, y: 4 });
630/// ```
631///
632/// Error behavior
633/// - If an empty document is provided (no content), a type-mismatch (eof) error is returned when
634///   attempting to deserialize into non-null-like targets.
635/// - If the reader contains multiple documents, an error is returned suggesting the
636///   `read`/`read_with_options` iterator APIs.
637/// - If `Options::budget` is set and a limit is exceeded, an error is returned early.
638pub fn from_reader_with_options<'a, R: std::io::Read + 'a, T: DeserializeOwned>(
639    reader: R,
640    options: Options,
641) -> Result<T, Error> {
642    let cfg = crate::de::Cfg::from_options(&options);
643    let mut src = LiveEvents::from_reader(
644        reader,
645        options.budget,
646        options.alias_limits,
647        false,
648        EnforcingPolicy::AllContent,
649    );
650    let value_res = crate::anchor_store::with_document_scope(|| {
651        T::deserialize(crate::de::Deser::new(&mut src, cfg))
652    });
653    let value = match value_res {
654        Ok(v) => v,
655        Err(e) => {
656            if src.synthesized_null_emitted() {
657                // If the only thing in the input was an empty document (synthetic null),
658                // surface this as an EOF error to preserve expected error semantics
659                // for incompatible target types (e.g., bool).
660                return Err(Error::eof().with_location(src.last_location()));
661            } else {
662                return Err(e);
663            }
664        }
665    };
666
667    // After finishing first document, peek ahead to detect either another document/content
668    // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
669    // ignore the trailing garbage. Otherwise, surface the error.
670    match src.peek() {
671        Ok(Some(_)) => {
672            return Err(Error::msg(
673                "multiple YAML documents detected; use read or read_with_options to obtain the iterator",
674            )
675                .with_location(src.last_location()));
676        }
677        Ok(None) => {}
678        Err(e) => {
679            if src.seen_doc_end() {
680                // Trailing garbage after a proper document end marker is ignored.
681            } else {
682                return Err(e);
683            }
684        }
685    }
686
687    src.finish()?;
688    Ok(value)
689}
690
691/// Create an iterator over YAML documents from any `std::io::Read` using default options.
692///
693/// This is a convenience wrapper around [`read_with_options`], equivalent to
694/// `read_with_options(reader, Options::default())`.
695///
696/// - It streams the reader without loading the whole input into memory.
697/// - Each item produced by the returned iterator is one deserialized YAML document of type `T`.
698/// - Documents that are completely empty or null-like (e.g., `"", ~, null`) are skipped.
699///
700/// Generic parameters
701/// - `R`: the concrete reader type implementing [`std::io::Read`]. You almost never need to
702///   write this explicitly; the compiler will infer it from the `reader` you pass. When using
703///   turbofish, write `_` to let the compiler infer `R`.
704/// - `T`: the type to deserialize each YAML document into. Must implement [`serde::de::DeserializeOwned`].
705///
706/// Lifetimes
707/// - `'a`: the lifetime of the returned iterator, tied to the lifetime of the provided `reader`.
708///   The iterator cannot outlive the reader it was created from.
709///
710/// Limits and budget
711/// - Uses `Options::default()`, which enables a YAML parsing budget by default. This enforces
712///   limits such as maximum events, nodes, nesting depth, total scalar bytes
713///   Maximal input size limit is turned to off, as input for the streaming reader may
714///   potentially be indefinite. To customize these, call [`read_with_options`] and set
715///   `Options::budget.max_reader_input_bytes` in the provided `Options`.
716/// - Alias replay limits are also enforced with their default values to mitigate alias bombs.
717///
718/// ```rust
719/// use serde::Deserialize;
720///
721/// #[derive(Debug, Deserialize, PartialEq)]
722/// struct Simple { id: usize }
723///
724/// let yaml = b"id: 1\n---\nid: 2\n";
725/// let mut reader = std::io::Cursor::new(&yaml[..]);
726///
727/// // Type `T` is inferred from the collection target (Vec<Simple>).
728/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
729///     .map(|r| r.unwrap())
730///     .collect();
731/// assert_eq!(values.len(), 2);
732/// assert_eq!(values[0].id, 1);
733/// ```
734///
735/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
736/// ```rust
737/// use serde::Deserialize;
738///
739/// #[derive(Debug, Deserialize, PartialEq)]
740/// struct Simple { id: usize }
741///
742/// let yaml = b"id: 10\n---\nid: 20\n";
743/// let mut reader = std::io::Cursor::new(&yaml[..]);
744///
745/// // First turbofish parameter is R (reader type), `_` lets the compiler infer it.
746/// let iter = serde_saphyr::read::<_, Simple>(&mut reader);
747/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
748/// assert_eq!(ids, vec![10, 20]);
749/// ```
750///
751/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
752///   if parsing fails or a limit is exceeded. After an error, the iterator ends.
753/// - Empty/null-like documents are skipped and produce no items.
754/// 
755/// *Note* Some content of the next document is read before the current parsed document is emitted. 
756/// Hence, while streaming is good for safely parsing large files with multiple documents without 
757/// loading it into RAM in advance, it does not emit each document exactly 
758/// after `---`  is encountered.
759pub fn read<'a, R, T>(reader: &'a mut R) -> Box<dyn Iterator<Item = Result<T, Error>> + 'a>
760where
761    R: Read + 'a,
762    T: DeserializeOwned + 'a,
763{
764    Box::new(read_with_options(
765        reader,
766        Options {
767            budget: Some(Budget {
768                max_reader_input_bytes: None,
769                ..Budget::default()
770            }),
771            ..Options::default()
772        },
773    ))
774}
775
776/// Create an iterator over YAML documents from any `std::io::Read`, with configurable options.
777///
778/// This is the multi-document counterpart to [`from_reader_with_options`]. It does not load
779/// the entire input into memory. Instead, it streams the reader, deserializing one document
780/// at a time into values of type `T`, yielding them through the returned iterator. Documents
781/// that are completely empty or null-like (e.g., `""`, `~`, or `null`) are skipped.
782///
783/// Generic parameters
784/// - `R`: the concrete reader type that implements [`std::io::Read`]. You rarely need to spell
785///   this out; it is almost always inferred from the `reader` value you pass in. When using
786///   turbofish, you can write `_` for this parameter to let the compiler infer it.
787/// - `T`: the type to deserialize each YAML document into. This must implement [`serde::de::DeserializeOwned`].
788///
789/// Lifetimes
790/// - `'a`: the lifetime of the returned iterator. It is tied to the lifetime of the provided
791///   `reader` value because the iterator borrows internal state that references the reader.
792///   In practice, this means the iterator cannot outlive the reader it was created from.
793///
794/// Limits and budget
795/// - All parsing limits configured via [`Options::budget`] (such as maximum events, nodes,
796///   nesting depth, total scalar bytes) are enforced while streaming. A hard input-byte cap
797///   is also enforced via `Budget::max_reader_input_bytes` (256 MiB by default), set this
798///   to None if you need a streamer to exist for arbitrary long time.
799/// - Alias replay limits from [`Options::alias_limits`] are also enforced to mitigate alias bombs.
800///
801/// ```rust
802/// use serde::Deserialize;
803///
804/// #[derive(Debug, Deserialize, PartialEq)]
805/// struct Simple { id: usize }
806///
807/// let yaml = b"id: 1\n---\nid: 2\n";
808/// let mut reader = std::io::Cursor::new(&yaml[..]);
809///
810/// // Type `T` is inferred from the collection target (Vec<Simple>).
811/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
812///     .map(|r| r.unwrap())
813///     .collect();
814/// assert_eq!(values.len(), 2);
815/// assert_eq!(values[0].id, 1);
816/// ```
817///
818/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
819/// ```rust
820/// use serde::Deserialize;
821///
822/// #[derive(Debug, Deserialize, PartialEq)]
823/// struct Simple { id: usize }
824///
825/// let yaml = b"id: 10\n---\nid: 20\n";
826/// let mut reader = std::io::Cursor::new(&yaml[..]);
827///
828/// // First turbofish parameter is R (reader type) which we let the compiler infer via `_`.
829/// let iter = serde_saphyr::read_with_options::<_, Simple>(&mut reader, serde_saphyr::Options::default());
830/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
831/// assert_eq!(ids, vec![10, 20]);
832/// ```
833///
834/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
835///   if parsing fails or a budget/alias limit is exceeded. After an error, the iterator ends.
836/// - Empty/null-like documents are skipped and produce no items.
837pub fn read_with_options<'a, R, T>(
838    reader: &'a mut R, // iterator must not outlive this borrow
839    options: Options,
840) -> impl Iterator<Item = Result<T, Error>> + 'a
841where
842    R: Read + 'a,
843    T: DeserializeOwned + 'a,
844{
845    struct ReadIter<'a, T> {
846        src: LiveEvents<'a>, // borrows from `reader`
847        cfg: crate::de::Cfg,
848        finished: bool,
849        _marker: std::marker::PhantomData<T>,
850    }
851
852    impl<'a, T> Iterator for ReadIter<'a, T>
853    where
854        T: DeserializeOwned + 'a,
855    {
856        type Item = Result<T, Error>;
857
858        fn next(&mut self) -> Option<Self::Item> {
859            if self.finished {
860                return None;
861            }
862            loop {
863                match self.src.peek() {
864                    Ok(Some(Ev::Scalar { value, style, .. }))
865                        if scalar_is_nullish(value, style) =>
866                    {
867                        let _ = self.src.next();
868                        continue;
869                    }
870                    Ok(Some(_)) => {
871                        let res = crate::anchor_store::with_document_scope(|| {
872                            T::deserialize(crate::de::Deser::new(&mut self.src, self.cfg))
873                        });
874                        return Some(res);
875                    }
876                    Ok(None) => {
877                        self.finished = true;
878                        if let Err(e) = self.src.finish() {
879                            return Some(Err(e));
880                        }
881                        return None;
882                    }
883                    Err(e) => {
884                        self.finished = true;
885                        let _ = self.src.finish();
886                        return Some(Err(e));
887                    }
888                }
889            }
890        }
891    }
892
893    let cfg = crate::de::Cfg::from_options(&options);
894    let src = LiveEvents::from_reader(
895        reader,
896        options.budget,
897        options.alias_limits,
898        false,
899        EnforcingPolicy::PerDocument,
900    );
901
902    ReadIter::<T> {
903        src,
904        cfg,
905        finished: false,
906        _marker: std::marker::PhantomData,
907    }
908}