serde_saphyr/lib.rs
1/// Serialization public API is defined at crate root
2pub use anchors::{ArcAnchor, ArcWeakAnchor, RcAnchor, RcWeakAnchor};
3pub use de::{Budget, DuplicateKeyPolicy, Error, Location, Options};
4pub use ser::{Commented, FlowMap, FlowSeq, FoldStr, LitStr};
5
6use crate::budget::EnforcingPolicy;
7use crate::de::{Ev, Events};
8use crate::live_events::LiveEvents;
9use crate::parse_scalars::scalar_is_nullish;
10pub use crate::serializer_options::SerializerOptions;
11use serde::de::DeserializeOwned;
12use std::io::Read;
13
14mod anchor_store;
15mod anchors;
16mod base64;
17pub mod budget;
18mod de;
19mod de_error;
20mod live_events;
21pub mod options;
22mod parse_scalars;
23mod ser;
24
25pub mod ser_error;
26
27mod serializer_options;
28mod tags;
29
30pub(crate) mod ser_quoting;
31
32#[cfg(feature = "robotics")]
33pub mod angles_conversions;
34mod buffered_input;
35// ---------------- Serialization (public API) ----------------
36
37/// Serialize a value to a YAML `String`.
38///
39/// This is the easiest entry point when you just want a YAML string.
40///
41/// Example
42///
43/// ```rust
44/// use serde::Serialize;
45///
46/// #[derive(Serialize)]
47/// struct Foo { a: i32, b: bool }
48///
49/// let s = serde_saphyr::to_string(&Foo { a: 1, b: true }).unwrap();
50/// assert!(s.contains("a: 1"));
51/// ```
52pub fn to_string<T: serde::Serialize>(value: &T) -> std::result::Result<String, crate::ser::Error> {
53 let mut out = String::new();
54 to_fmt_writer(&mut out, value)?;
55 Ok(out)
56}
57
58/// Deprecated: use `to_fmt_writer` or `to_io_writer`
59/// Kept for a transition release to avoid instant breakage.
60#[deprecated(
61 since = "0.0.7",
62 note = "Use `to_fmt_writer` for `fmt::Write` (String, fmt::Formatter) or `to_io_writer` for files/sockets."
63)]
64pub fn to_writer<W: std::fmt::Write, T: serde::Serialize>(
65 output: &mut W,
66 value: &T,
67) -> std::result::Result<(), crate::ser::Error> {
68 let mut ser = crate::ser::YamlSer::new(output);
69 value.serialize(&mut ser)
70}
71
72/// Serialize a value as YAML into any [`fmt::Write`] target.
73pub fn to_fmt_writer<W: std::fmt::Write, T: serde::Serialize>(
74 output: &mut W,
75 value: &T,
76) -> std::result::Result<(), crate::ser::Error> {
77 to_fmt_writer_with_options(output, value, SerializerOptions::default())
78}
79
80/// Serialize a value as YAML into any [`io::Write`] target.
81pub fn to_io_writer<W: std::io::Write, T: serde::Serialize>(
82 output: &mut W,
83 value: &T,
84) -> std::result::Result<(), crate::ser::Error> {
85 to_io_writer_with_options(output, value, SerializerOptions::default())
86}
87
88/// Serialize a value as YAML into any [`fmt::Write`] target, with options.
89/// Options are consumed because anchor generator may be taken from them.
90pub fn to_fmt_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
91 output: &mut W,
92 value: &T,
93 mut options: SerializerOptions,
94) -> std::result::Result<(), crate::ser::Error> {
95 let mut ser = crate::ser::YamlSer::with_options(output, &mut options);
96 value.serialize(&mut ser)
97}
98
99/// Serialize a value as YAML into any [`io::Write`] target, with options.
100/// Options are consumed because anchor generator may be taken from them.
101pub fn to_io_writer_with_options<W: std::io::Write, T: serde::Serialize>(
102 output: &mut W,
103 value: &T,
104 mut options: SerializerOptions,
105) -> std::result::Result<(), crate::ser::Error> {
106 struct Adapter<'a, W: std::io::Write> {
107 output: &'a mut W,
108 last_err: Option<std::io::Error>,
109 }
110 impl<'a, W: std::io::Write> std::fmt::Write for Adapter<'a, W> {
111 fn write_str(&mut self, s: &str) -> std::fmt::Result {
112 if let Err(e) = self.output.write_all(s.as_bytes()) {
113 self.last_err = Some(e);
114 return Err(std::fmt::Error);
115 }
116 Ok(())
117 }
118 fn write_char(&mut self, c: char) -> std::fmt::Result {
119 let mut buf = [0u8; 4];
120 let s = c.encode_utf8(&mut buf);
121 self.write_str(s)
122 }
123 }
124 let mut adapter = Adapter {
125 output: output,
126 last_err: None,
127 };
128 let mut ser = crate::ser::YamlSer::with_options(&mut adapter, &mut options);
129 match value.serialize(&mut ser) {
130 Ok(()) => Ok(()),
131 Err(e) => {
132 if let Some(io_error) = adapter.last_err.take() {
133 return Err(crate::ser::Error::from(io_error));
134 }
135 Err(e)
136 }
137 }
138}
139
140/// Deprecated: use `to_fmt_writer_with_options` for `fmt::Write` or `to_io_writer_with_options` for `io::Write`.
141#[deprecated(
142 since = "0.0.7",
143 note = "Use `to_fmt_writer_with_options` for fmt::Write or `to_io_writer_with_options` for io::Write."
144)]
145pub fn to_writer_with_options<W: std::fmt::Write, T: serde::Serialize>(
146 output: &mut W,
147 value: &T,
148 options: SerializerOptions,
149) -> std::result::Result<(), crate::ser::Error> {
150 to_fmt_writer_with_options(output, value, options)
151}
152
153/// Deserialize any `T: serde::de::DeserializeOwned` directly from a YAML string.
154///
155/// This is the simplest entry point; it parses a single YAML document. If the
156/// input contains multiple documents, this returns an error advising to use
157/// [`from_multiple`] or [`from_multiple_with_options`].
158///
159/// Example: read a small `Config` structure from a YAML string.
160///
161/// ```rust
162/// use serde::Deserialize;
163///
164/// #[derive(Debug, Deserialize, PartialEq)]
165/// struct Config {
166/// name: String,
167/// enabled: bool,
168/// retries: i32,
169/// }
170///
171/// let yaml = r#"
172/// name: My Application
173/// enabled: true
174/// retries: 5
175/// "#;
176///
177/// let cfg: Config = serde_saphyr::from_str(yaml).unwrap();
178/// assert!(cfg.enabled);
179/// ```
180pub fn from_str<T: DeserializeOwned>(input: &str) -> Result<T, Error> {
181 from_str_with_options(input, Options::default())
182}
183
184/// Deserialize a single YAML document with configurable [`Options`].
185///
186/// Example: read a small `Config` with a custom budget and default duplicate-key policy.
187///
188/// ```rust
189/// use serde::Deserialize;
190/// use serde_saphyr::DuplicateKeyPolicy;
191///
192/// #[derive(Debug, Deserialize, PartialEq)]
193/// struct Config {
194/// name: String,
195/// enabled: bool,
196/// retries: i32,
197/// }
198///
199/// let yaml = r#"
200/// name: My Application
201/// enabled: true
202/// retries: 5
203/// "#;
204///
205/// let options = serde_saphyr::Options {
206/// budget: Some(serde_saphyr::Budget {
207/// max_anchors: 200,
208/// .. serde_saphyr::Budget::default()
209/// }),
210/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
211/// .. serde_saphyr::Options::default()
212/// };
213/// let cfg: Config = serde_saphyr::from_str_with_options(yaml, options).unwrap();
214/// assert_eq!(cfg.retries, 5);
215/// ```
216pub fn from_str_with_options<T: DeserializeOwned>(
217 input: &str,
218 options: Options,
219) -> Result<T, Error> {
220 // Normalize: ignore a single leading UTF-8 BOM if present.
221 let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
222 rest
223 } else {
224 input
225 };
226
227 let cfg = crate::de::Cfg::from_options(&options);
228 // Do not stop at DocumentEnd; we'll probe for trailing content/errors explicitly.
229 let mut src = LiveEvents::from_str(input, options.budget, options.alias_limits, false);
230 let value_res = crate::anchor_store::with_document_scope(|| {
231 T::deserialize(crate::de::Deser::new(&mut src, cfg))
232 });
233 let value = match value_res {
234 Ok(v) => v,
235 Err(e) => {
236 if src.synthesized_null_emitted() {
237 // If the only thing in the input was an empty document (synthetic null),
238 // surface this as an EOF error to preserve expected error semantics
239 // for incompatible target types (e.g., bool).
240 return Err(Error::eof().with_location(src.last_location()));
241 } else {
242 return Err(e);
243 }
244 }
245 };
246
247 // After finishing first document, peek ahead to detect either another document/content
248 // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
249 // ignore the trailing garbage. Otherwise, surface the error.
250 match src.peek() {
251 Ok(Some(_)) => {
252 return Err(Error::msg(
253 "multiple YAML documents detected; use from_multiple or from_multiple_with_options",
254 )
255 .with_location(src.last_location()));
256 }
257 Ok(None) => {}
258 Err(e) => {
259 if src.seen_doc_end() {
260 // Trailing garbage after a proper document end marker is ignored.
261 } else {
262 return Err(e);
263 }
264 }
265 }
266
267 src.finish()?;
268 Ok(value)
269}
270
271/// Deserialize multiple YAML documents from a single string into a vector of `T`.
272/// Completely empty documents are ignored and not included into returned vector.
273///
274/// Example: read two `Config` documents separated by `---`.
275///
276/// ```rust
277/// use serde::Deserialize;
278///
279/// #[derive(Debug, Deserialize, PartialEq)]
280/// struct Config {
281/// name: String,
282/// enabled: bool,
283/// retries: i32,
284/// }
285///
286/// let yaml = r#"
287/// name: First
288/// enabled: true
289/// retries: 1
290/// ---
291/// name: Second
292/// enabled: false
293/// retries: 2
294/// "#;
295///
296/// let cfgs: Vec<Config> = serde_saphyr::from_multiple(yaml).unwrap();
297/// assert_eq!(cfgs.len(), 2);
298/// assert_eq!(cfgs[0].name, "First");
299/// ```
300pub fn from_multiple<T: DeserializeOwned>(input: &str) -> Result<Vec<T>, Error> {
301 from_multiple_with_options(input, Options::default())
302}
303
304/// Deserialize multiple YAML documents into a vector with configurable [`Options`].
305///
306/// Example: two `Config` documents with a custom budget.
307///
308/// ```rust
309/// use serde::Deserialize;
310/// use serde_saphyr::DuplicateKeyPolicy;
311///
312/// #[derive(Debug, Deserialize, PartialEq)]
313/// struct Config {
314/// name: String,
315/// enabled: bool,
316/// retries: i32,
317/// }
318///
319/// let yaml = r#"
320/// name: First
321/// enabled: true
322/// retries: 1
323/// ---
324/// name: Second
325/// enabled: false
326/// retries: 2
327/// "#;
328///
329/// let options = serde_saphyr::Options {
330/// budget: Some(serde_saphyr::Budget {
331/// max_anchors: 200,
332/// .. serde_saphyr::Budget::default()
333/// }),
334/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
335/// .. serde_saphyr::Options::default()
336/// };
337/// let cfgs: Vec<Config> = serde_saphyr::from_multiple_with_options(yaml, options).unwrap();
338/// assert_eq!(cfgs.len(), 2);
339/// assert!(!cfgs[1].enabled);
340/// ```
341pub fn from_multiple_with_options<T: DeserializeOwned>(
342 input: &str,
343 options: Options,
344) -> Result<Vec<T>, Error> {
345 // Normalize: ignore a single leading UTF-8 BOM if present.
346 let input = if let Some(rest) = input.strip_prefix('\u{FEFF}') {
347 rest
348 } else {
349 input
350 };
351 let cfg = crate::de::Cfg::from_options(&options);
352 let mut src = LiveEvents::from_str(input, options.budget, options.alias_limits, false);
353 let mut values = Vec::new();
354
355 loop {
356 match src.peek()? {
357 // Skip documents that are explicit null-like scalars ("", "~", or "null").
358 Some(Ev::Scalar {
359 value: s, style, ..
360 }) if scalar_is_nullish(s, style) => {
361 let _ = src.next()?; // consume the null scalar document
362 // Do not push anything for this document; move to the next one.
363 continue;
364 }
365 Some(_) => {
366 let value = crate::anchor_store::with_document_scope(|| {
367 T::deserialize(crate::de::Deser::new(&mut src, cfg))
368 })?;
369 values.push(value);
370 }
371 None => break,
372 }
373 }
374
375 src.finish()?;
376 Ok(values)
377}
378
379/// Deserialize a single YAML document from a UTF-8 byte slice.
380///
381/// This is equivalent to [`from_str`], but accepts `&[u8]` and validates it is
382/// valid UTF-8 before parsing.
383///
384/// Example: read a small `Config` structure from bytes.
385///
386/// ```rust
387/// use serde::Deserialize;
388///
389/// #[derive(Debug, Deserialize, PartialEq)]
390/// struct Config {
391/// name: String,
392/// enabled: bool,
393/// retries: i32,
394/// }
395///
396/// let yaml = r#"
397/// name: My Application
398/// enabled: true
399/// retries: 5
400/// "#;
401/// let bytes = yaml.as_bytes();
402/// let cfg: Config = serde_saphyr::from_slice(bytes).unwrap();
403/// assert!(cfg.enabled);
404/// ```
405///
406pub fn from_slice<T: DeserializeOwned>(bytes: &[u8]) -> Result<T, Error> {
407 from_slice_with_options(bytes, Options::default())
408}
409
410/// Deserialize a single YAML document from a UTF-8 byte slice with configurable [`Options`].
411///
412/// Example: read a small `Config` with a custom budget from bytes.
413///
414/// ```rust
415/// use serde::Deserialize;
416/// use serde_saphyr::DuplicateKeyPolicy;
417///
418/// #[derive(Debug, Deserialize, PartialEq)]
419/// struct Config {
420/// name: String,
421/// enabled: bool,
422/// retries: i32,
423/// }
424///
425/// let yaml = r#"
426/// name: My Application
427/// enabled: true
428/// retries: 5
429/// "#;
430/// let bytes = yaml.as_bytes();
431/// let options = serde_saphyr::Options {
432/// budget: Some(serde_saphyr::Budget {
433/// max_anchors: 200,
434/// .. serde_saphyr::Budget::default()
435/// }),
436/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
437/// .. serde_saphyr::Options::default()
438/// };
439/// let cfg: Config = serde_saphyr::from_slice_with_options(bytes, options).unwrap();
440/// assert_eq!(cfg.retries, 5);
441/// ```
442pub fn from_slice_with_options<T: DeserializeOwned>(
443 bytes: &[u8],
444 options: Options,
445) -> Result<T, Error> {
446 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
447 from_str_with_options(s, options)
448}
449
450/// Deserialize multiple YAML documents from a UTF-8 byte slice into a vector of `T`.
451///
452/// Example: read two `Config` documents separated by `---` from bytes.
453///
454/// ```rust
455/// use serde::Deserialize;
456///
457/// #[derive(Debug, Deserialize, PartialEq)]
458/// struct Config {
459/// name: String,
460/// enabled: bool,
461/// retries: i32,
462/// }
463///
464/// let yaml = r#"
465/// name: First
466/// enabled: true
467/// retries: 1
468/// ---
469/// name: Second
470/// enabled: false
471/// retries: 2
472/// "#;
473/// let bytes = yaml.as_bytes();
474/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple(bytes).unwrap();
475/// assert_eq!(cfgs.len(), 2);
476/// assert_eq!(cfgs[0].name, "First");
477/// ```
478pub fn from_slice_multiple<T: DeserializeOwned>(bytes: &[u8]) -> Result<Vec<T>, Error> {
479 from_slice_multiple_with_options(bytes, Options::default())
480}
481
482/// Deserialize multiple YAML documents from bytes with configurable [`Options`].
483/// Completely empty documents are ignored and not included into returned vector.
484///
485/// Example: two `Config` documents with a custom budget from bytes.
486///
487/// ```rust
488/// use serde::Deserialize;
489/// use serde_saphyr::DuplicateKeyPolicy;
490///
491/// #[derive(Debug, Deserialize, PartialEq)]
492/// struct Config {
493/// name: String,
494/// enabled: bool,
495/// retries: i32,
496/// }
497///
498/// let yaml = r#"
499/// name: First
500/// enabled: true
501/// retries: 1
502/// ---
503/// name: Second
504/// enabled: false
505/// retries: 2
506/// "#;
507/// let bytes = yaml.as_bytes();
508/// let options = serde_saphyr::Options {
509/// budget: Some(serde_saphyr::Budget {
510/// max_anchors: 200,
511/// .. serde_saphyr::Budget::default()
512/// }),
513/// duplicate_keys: DuplicateKeyPolicy::FirstWins,
514/// .. serde_saphyr::Options::default()
515/// };
516/// let cfgs: Vec<Config> = serde_saphyr::from_slice_multiple_with_options(bytes, options).unwrap();
517/// assert_eq!(cfgs.len(), 2);
518/// assert!(!cfgs[1].enabled);
519/// ```
520pub fn from_slice_multiple_with_options<T: DeserializeOwned>(
521 bytes: &[u8],
522 options: Options,
523) -> Result<Vec<T>, Error> {
524 let s = std::str::from_utf8(bytes).map_err(|_| Error::msg("input is not valid UTF-8"))?;
525 from_multiple_with_options(s, options)
526}
527
528/// Serialize multiple documents into a YAML string.
529///
530/// Serializes each value in the provided slice as an individual YAML document.
531/// Documents are separated by a standard YAML document start marker ("---\n").
532/// No marker is emitted before the first document.
533///
534/// Example
535///
536/// ```rust
537/// use serde::Serialize;
538///
539/// #[derive(Serialize)]
540/// struct Point { x: i32 }
541///
542/// let docs = vec![Point { x: 1 }, Point { x: 2 }];
543/// let out = serde_saphyr::to_string_multiple(&docs).unwrap();
544/// assert_eq!(out, "x: 1\n---\nx: 2\n");
545/// ```
546pub fn to_string_multiple<T: serde::Serialize>(
547 values: &[T],
548) -> std::result::Result<String, crate::ser::Error> {
549 let mut out = String::new();
550 let mut first = true;
551 for v in values {
552 if !first {
553 out.push_str("---\n");
554 }
555 first = false;
556 to_fmt_writer(&mut out, v)?;
557 }
558 Ok(out)
559}
560
561/// Deserialize a single YAML document from any `std::io::Read`.
562///
563/// The entire reader is read into memory (buffered) and then deserialized
564/// using the same logic as [`from_slice`]. This function is convenient when
565/// your YAML input comes from a file or any other IO stream.
566///
567/// Example
568///
569/// ```rust
570/// use serde::{Deserialize, Serialize};
571/// use std::collections::HashMap;
572/// use serde_json::Value;
573///
574/// #[derive(Debug, PartialEq, Serialize, Deserialize)]
575/// struct Point {
576/// x: i32,
577/// y: i32,
578/// }
579///
580/// let yaml = "x: 3\ny: 4\n";
581/// let reader = std::io::Cursor::new(yaml.as_bytes());
582/// let p: Point = serde_saphyr::from_reader(reader).unwrap();
583/// assert_eq!(p, Point { x: 3, y: 4 });
584///
585/// // It also works for dynamic values like serde_json::Value
586/// let mut big = String::new();
587/// let mut i = 0usize;
588/// while big.len() < 64 * 1024 { big.push_str(&format!("k{0}: v{0}\n", i)); i += 1; }
589/// let reader = std::io::Cursor::new(big.as_bytes().to_owned());
590/// let _value: Value = serde_saphyr::from_reader(reader).unwrap();
591/// ```
592pub fn from_reader<'a, R: std::io::Read + 'a, T: DeserializeOwned>(reader: R) -> Result<T, Error> {
593 from_reader_with_options(reader, Options::default())
594}
595
596/// Deserialize a single YAML document from any `std::io::Read` with configurable `Options`.
597///
598/// This is the reader-based counterpart to [`from_str_with_options`]. It consumes a
599/// byte-oriented reader, decodes it to UTF-8, and streams events into the deserializer.
600///
601/// Notes on limits and large inputs
602/// - Parsing limits: Use [`Options::budget`] to constrain YAML complexity (events, nodes,
603/// nesting depth, total scalar bytes, number of documents, anchors, aliases, etc.). These
604/// limits are enforced during parsing and are enabled by default via `Options::default()`.
605/// - Byte-level input cap: A hard cap on input bytes is enforced via `Options::budget.max_reader_input_bytes`.
606/// The default budget sets this to 256 MiB. You can override it by customizing `Options::budget`.
607/// When the cap is exceeded, deserialization fails early with a budget error.
608///
609/// Example: limit raw input bytes and customize options
610/// ```rust
611/// use std::io::{Read, Cursor};
612/// use serde::Deserialize;
613/// use serde_saphyr::{Budget, Options};
614///
615/// #[derive(Debug, Deserialize, PartialEq)]
616/// struct Point { x: i32, y: i32 }
617///
618/// let yaml = "x: 3\ny: 4\n";
619/// let reader = Cursor::new(yaml.as_bytes());
620///
621/// // Cap the reader to at most 1 KiB of input bytes.
622/// let capped = reader.take(1024);
623///
624/// // Tighten the parsing budget as well (optional).
625/// let mut opts = Options::default();
626/// opts.budget = Some(Budget { max_events: 10_000, ..Budget::default() });
627///
628/// let p: Point = serde_saphyr::from_reader_with_options(capped, opts).unwrap();
629/// assert_eq!(p, Point { x: 3, y: 4 });
630/// ```
631///
632/// Error behavior
633/// - If an empty document is provided (no content), a type-mismatch (eof) error is returned when
634/// attempting to deserialize into non-null-like targets.
635/// - If the reader contains multiple documents, an error is returned suggesting the
636/// `read`/`read_with_options` iterator APIs.
637/// - If `Options::budget` is set and a limit is exceeded, an error is returned early.
638pub fn from_reader_with_options<'a, R: std::io::Read + 'a, T: DeserializeOwned>(
639 reader: R,
640 options: Options,
641) -> Result<T, Error> {
642 let cfg = crate::de::Cfg::from_options(&options);
643 let mut src = LiveEvents::from_reader(
644 reader,
645 options.budget,
646 options.alias_limits,
647 false,
648 EnforcingPolicy::AllContent,
649 );
650 let value_res = crate::anchor_store::with_document_scope(|| {
651 T::deserialize(crate::de::Deser::new(&mut src, cfg))
652 });
653 let value = match value_res {
654 Ok(v) => v,
655 Err(e) => {
656 if src.synthesized_null_emitted() {
657 // If the only thing in the input was an empty document (synthetic null),
658 // surface this as an EOF error to preserve expected error semantics
659 // for incompatible target types (e.g., bool).
660 return Err(Error::eof().with_location(src.last_location()));
661 } else {
662 return Err(e);
663 }
664 }
665 };
666
667 // After finishing first document, peek ahead to detect either another document/content
668 // or trailing garbage. If a scan error occurs but we have seen a DocumentEnd ("..."),
669 // ignore the trailing garbage. Otherwise, surface the error.
670 match src.peek() {
671 Ok(Some(_)) => {
672 return Err(Error::msg(
673 "multiple YAML documents detected; use read or read_with_options to obtain the iterator",
674 )
675 .with_location(src.last_location()));
676 }
677 Ok(None) => {}
678 Err(e) => {
679 if src.seen_doc_end() {
680 // Trailing garbage after a proper document end marker is ignored.
681 } else {
682 return Err(e);
683 }
684 }
685 }
686
687 src.finish()?;
688 Ok(value)
689}
690
691/// Create an iterator over YAML documents from any `std::io::Read` using default options.
692///
693/// This is a convenience wrapper around [`read_with_options`], equivalent to
694/// `read_with_options(reader, Options::default())`.
695///
696/// - It streams the reader without loading the whole input into memory.
697/// - Each item produced by the returned iterator is one deserialized YAML document of type `T`.
698/// - Documents that are completely empty or null-like (e.g., `"", ~, null`) are skipped.
699///
700/// Generic parameters
701/// - `R`: the concrete reader type implementing [`std::io::Read`]. You almost never need to
702/// write this explicitly; the compiler will infer it from the `reader` you pass. When using
703/// turbofish, write `_` to let the compiler infer `R`.
704/// - `T`: the type to deserialize each YAML document into. Must implement [`serde::de::DeserializeOwned`].
705///
706/// Lifetimes
707/// - `'a`: the lifetime of the returned iterator, tied to the lifetime of the provided `reader`.
708/// The iterator cannot outlive the reader it was created from.
709///
710/// Limits and budget
711/// - Uses `Options::default()`, which enables a YAML parsing budget by default. This enforces
712/// limits such as maximum events, nodes, nesting depth, total scalar bytes
713/// Maximal input size limit is turned to off, as input for the streaming reader may
714/// potentially be indefinite. To customize these, call [`read_with_options`] and set
715/// `Options::budget.max_reader_input_bytes` in the provided `Options`.
716/// - Alias replay limits are also enforced with their default values to mitigate alias bombs.
717///
718/// ```rust
719/// use serde::Deserialize;
720///
721/// #[derive(Debug, Deserialize, PartialEq)]
722/// struct Simple { id: usize }
723///
724/// let yaml = b"id: 1\n---\nid: 2\n";
725/// let mut reader = std::io::Cursor::new(&yaml[..]);
726///
727/// // Type `T` is inferred from the collection target (Vec<Simple>).
728/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
729/// .map(|r| r.unwrap())
730/// .collect();
731/// assert_eq!(values.len(), 2);
732/// assert_eq!(values[0].id, 1);
733/// ```
734///
735/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
736/// ```rust
737/// use serde::Deserialize;
738///
739/// #[derive(Debug, Deserialize, PartialEq)]
740/// struct Simple { id: usize }
741///
742/// let yaml = b"id: 10\n---\nid: 20\n";
743/// let mut reader = std::io::Cursor::new(&yaml[..]);
744///
745/// // First turbofish parameter is R (reader type), `_` lets the compiler infer it.
746/// let iter = serde_saphyr::read::<_, Simple>(&mut reader);
747/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
748/// assert_eq!(ids, vec![10, 20]);
749/// ```
750///
751/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
752/// if parsing fails or a limit is exceeded. After an error, the iterator ends.
753/// - Empty/null-like documents are skipped and produce no items.
754///
755/// *Note* Some content of the next document is read before the current parsed document is emitted.
756/// Hence, while streaming is good for safely parsing large files with multiple documents without
757/// loading it into RAM in advance, it does not emit each document exactly
758/// after `---` is encountered.
759pub fn read<'a, R, T>(reader: &'a mut R) -> Box<dyn Iterator<Item = Result<T, Error>> + 'a>
760where
761 R: Read + 'a,
762 T: DeserializeOwned + 'a,
763{
764 Box::new(read_with_options(
765 reader,
766 Options {
767 budget: Some(Budget {
768 max_reader_input_bytes: None,
769 ..Budget::default()
770 }),
771 ..Options::default()
772 },
773 ))
774}
775
776/// Create an iterator over YAML documents from any `std::io::Read`, with configurable options.
777///
778/// This is the multi-document counterpart to [`from_reader_with_options`]. It does not load
779/// the entire input into memory. Instead, it streams the reader, deserializing one document
780/// at a time into values of type `T`, yielding them through the returned iterator. Documents
781/// that are completely empty or null-like (e.g., `""`, `~`, or `null`) are skipped.
782///
783/// Generic parameters
784/// - `R`: the concrete reader type that implements [`std::io::Read`]. You rarely need to spell
785/// this out; it is almost always inferred from the `reader` value you pass in. When using
786/// turbofish, you can write `_` for this parameter to let the compiler infer it.
787/// - `T`: the type to deserialize each YAML document into. This must implement [`serde::de::DeserializeOwned`].
788///
789/// Lifetimes
790/// - `'a`: the lifetime of the returned iterator. It is tied to the lifetime of the provided
791/// `reader` value because the iterator borrows internal state that references the reader.
792/// In practice, this means the iterator cannot outlive the reader it was created from.
793///
794/// Limits and budget
795/// - All parsing limits configured via [`Options::budget`] (such as maximum events, nodes,
796/// nesting depth, total scalar bytes) are enforced while streaming. A hard input-byte cap
797/// is also enforced via `Budget::max_reader_input_bytes` (256 MiB by default), set this
798/// to None if you need a streamer to exist for arbitrary long time.
799/// - Alias replay limits from [`Options::alias_limits`] are also enforced to mitigate alias bombs.
800///
801/// ```rust
802/// use serde::Deserialize;
803///
804/// #[derive(Debug, Deserialize, PartialEq)]
805/// struct Simple { id: usize }
806///
807/// let yaml = b"id: 1\n---\nid: 2\n";
808/// let mut reader = std::io::Cursor::new(&yaml[..]);
809///
810/// // Type `T` is inferred from the collection target (Vec<Simple>).
811/// let values: Vec<Simple> = serde_saphyr::read(&mut reader)
812/// .map(|r| r.unwrap())
813/// .collect();
814/// assert_eq!(values.len(), 2);
815/// assert_eq!(values[0].id, 1);
816/// ```
817///
818/// Specifying only `T` with turbofish and letting `R` be inferred using `_`:
819/// ```rust
820/// use serde::Deserialize;
821///
822/// #[derive(Debug, Deserialize, PartialEq)]
823/// struct Simple { id: usize }
824///
825/// let yaml = b"id: 10\n---\nid: 20\n";
826/// let mut reader = std::io::Cursor::new(&yaml[..]);
827///
828/// // First turbofish parameter is R (reader type) which we let the compiler infer via `_`.
829/// let iter = serde_saphyr::read_with_options::<_, Simple>(&mut reader, serde_saphyr::Options::default());
830/// let ids: Vec<usize> = iter.map(|res| res.unwrap().id).collect();
831/// assert_eq!(ids, vec![10, 20]);
832/// ```
833///
834/// - Each `next()` yields either `Ok(T)` for a successfully deserialized document or `Err(Error)`
835/// if parsing fails or a budget/alias limit is exceeded. After an error, the iterator ends.
836/// - Empty/null-like documents are skipped and produce no items.
837pub fn read_with_options<'a, R, T>(
838 reader: &'a mut R, // iterator must not outlive this borrow
839 options: Options,
840) -> impl Iterator<Item = Result<T, Error>> + 'a
841where
842 R: Read + 'a,
843 T: DeserializeOwned + 'a,
844{
845 struct ReadIter<'a, T> {
846 src: LiveEvents<'a>, // borrows from `reader`
847 cfg: crate::de::Cfg,
848 finished: bool,
849 _marker: std::marker::PhantomData<T>,
850 }
851
852 impl<'a, T> Iterator for ReadIter<'a, T>
853 where
854 T: DeserializeOwned + 'a,
855 {
856 type Item = Result<T, Error>;
857
858 fn next(&mut self) -> Option<Self::Item> {
859 if self.finished {
860 return None;
861 }
862 loop {
863 match self.src.peek() {
864 Ok(Some(Ev::Scalar { value, style, .. }))
865 if scalar_is_nullish(value, style) =>
866 {
867 let _ = self.src.next();
868 continue;
869 }
870 Ok(Some(_)) => {
871 let res = crate::anchor_store::with_document_scope(|| {
872 T::deserialize(crate::de::Deser::new(&mut self.src, self.cfg))
873 });
874 return Some(res);
875 }
876 Ok(None) => {
877 self.finished = true;
878 if let Err(e) = self.src.finish() {
879 return Some(Err(e));
880 }
881 return None;
882 }
883 Err(e) => {
884 self.finished = true;
885 let _ = self.src.finish();
886 return Some(Err(e));
887 }
888 }
889 }
890 }
891 }
892
893 let cfg = crate::de::Cfg::from_options(&options);
894 let src = LiveEvents::from_reader(
895 reader,
896 options.budget,
897 options.alias_limits,
898 false,
899 EnforcingPolicy::PerDocument,
900 );
901
902 ReadIter::<T> {
903 src,
904 cfg,
905 finished: false,
906 _marker: std::marker::PhantomData,
907 }
908}