xt/
yaml.rs

1//! The YAML data format.
2
3use std::io::{self, BufRead, BufReader, Write};
4use std::str;
5
6use serde::{de, ser};
7
8use crate::input::{self, Input, Ref};
9use crate::transcode;
10
11mod chunker;
12mod encoding;
13
14use self::chunker::Chunker;
15use self::encoding::{Encoder, Encoding};
16
17pub(crate) fn input_matches(mut input: Ref) -> io::Result<bool> {
18	// YAML can be surprisingly liberal in what it accepts. In particular, the
19	// contents of many non-YAML text documents can actually be parsed as YAML
20	// scalars, such as TOML documents that do not start with a table. To
21	// prevent these kinds of weird matches, we only detect input as YAML when
22	// the first document in the stream encodes a collection (map or sequence).
23	let encoding = Encoding::detect(input.prefix(Encoding::DETECT_LEN)?);
24	let chunk = match &mut input {
25		Ref::Slice(b) => Chunker::new(Encoder::new(b, encoding)).next(),
26		Ref::Reader(r) => Chunker::new(Encoder::new(BufReader::new(r), encoding)).next(),
27	};
28	match chunk {
29		Some(Ok(doc)) => Ok(doc.is_collection()),
30		Some(Err(err)) if err.kind() == io::ErrorKind::InvalidData => Ok(false),
31		Some(Err(err)) => Err(err),
32		None => Ok(false),
33	}
34}
35
36pub(crate) fn transcode<O>(input: input::Handle, mut output: O) -> crate::Result<()>
37where
38	O: crate::Output,
39{
40	match input.into() {
41		Input::Reader(r) => transcode_reader(BufReader::new(r), output),
42		Input::Slice(b) => match str::from_utf8(&b) {
43			Ok(s) => {
44				for de in serde_yaml::Deserializer::from_str(s) {
45					output.transcode_from(de)?;
46				}
47				Ok(())
48			}
49			Err(_) => {
50				// The reader path supports automatic re-encoding of UTF-16 and
51				// UTF-32 input. See transcode_reader for details.
52				transcode_reader(&*b, output)
53			}
54		},
55	}
56}
57
58fn transcode_reader<R, O>(input: R, mut output: O) -> crate::Result<()>
59where
60	R: BufRead,
61	O: crate::Output,
62{
63	// serde_yaml imposes a couple of interesting limitations on us, which
64	// aren't clear from the documentation alone but are reflected in this
65	// usage.
66	//
67	// First, while serde_yaml supports creating a Deserializer from a reader,
68	// this actually slurps the entire input into a buffer for parsing. We
69	// support streaming parsing by implementing our own "chunker" that splits
70	// an unbounded YAML stream into a sequence of buffered documents. This is a
71	// terrible hack, and I sincerely hope that I will have the time and energy
72	// someday to implement true streaming support in serde_yaml.
73	//
74	// Second, serde_yaml does not support UTF-16 or UTF-32 input, even though
75	// YAML 1.2 requires this. In addition to our chunker, we implement a
76	// streaming encoder that can detect the encoding of any valid YAML stream
77	// and convert it to UTF-8. The encoder will also strip any byte order mark
78	// from the beginning of the stream, as serde_yaml will choke on it. This
79	// still doesn't cover the full YAML spec, which also allows BOMs in UTF-8
80	// streams and at the starts of individual documents in the stream.
81	// However, these cases should be much rarer than that of a single BOM at
82	// the start of a UTF-16 or UTF-32 stream.
83	for doc in Chunker::new(Encoder::from_reader(input)?) {
84		let doc = doc?;
85		let de = serde_yaml::Deserializer::from_str(doc.content());
86		output.transcode_from(de)?;
87	}
88	Ok(())
89}
90
91pub(crate) struct Output<W: Write>(W);
92
93impl<W: Write> Output<W> {
94	pub(crate) fn new(w: W) -> Output<W> {
95		Output(w)
96	}
97}
98
99impl<W: Write> crate::Output for Output<W> {
100	fn transcode_from<'de, D, E>(&mut self, de: D) -> crate::Result<()>
101	where
102		D: de::Deserializer<'de, Error = E>,
103		E: de::Error + Send + Sync + 'static,
104	{
105		writeln!(&mut self.0, "---")?;
106		let mut ser = serde_yaml::Serializer::new(&mut self.0);
107		transcode::transcode(&mut ser, de)?;
108		Ok(())
109	}
110
111	fn transcode_value<S>(&mut self, value: S) -> crate::Result<()>
112	where
113		S: ser::Serialize,
114	{
115		writeln!(&mut self.0, "---")?;
116		serde_yaml::to_writer(&mut self.0, &value)?;
117		Ok(())
118	}
119
120	fn flush(&mut self) -> io::Result<()> {
121		self.0.flush()
122	}
123}