1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
//! Translations between serialized data formats.
//!
//! **xt is pre-1.0 software with an unstable library API!**
//!
//! To convert between serialized data formats in Rust code, consider the
//! mature, stable, and widely-used [`serde_transcode`][serde-transcode] crate
//! instead.
//!
//! [serde-transcode]: https://docs.rs/serde-transcode

#![deny(
	// Enforce some additional strictness on unsafe code.
	unsafe_op_in_unsafe_fn,
	clippy::undocumented_unsafe_blocks,
	// Deny a number of `as` casts in favor of safer alternatives.
	clippy::as_underscore,
	clippy::ptr_as_ptr,
	clippy::cast_lossless,
	clippy::cast_possible_truncation,
	clippy::checked_conversions,
	clippy::unnecessary_cast,
	// More general style-type things.
	clippy::from_over_into,
	clippy::semicolon_if_nothing_returned,
)]
#![warn(
	// Print macros can panic, and should only be for temporary debugging.
	clippy::print_stderr,
	clippy::print_stdout,
	// The following macros represent incomplete implementation work.
	clippy::todo,
	clippy::unimplemented,
)]

use std::fmt;
use std::io::{self, Read, Write};

use serde::{de, ser};

mod detect;
mod error;
mod input;
mod json;
mod msgpack;
mod toml;
mod transcode;
mod yaml;

pub use error::{Error, Result};

/// Translates the contents of a single input slice to a different format.
///
/// See [`Translator::translate_slice`].
pub fn translate_slice<W>(input: &[u8], from: Option<Format>, to: Format, output: W) -> Result<()>
where
	W: Write,
{
	Translator::new(output, to).translate_slice(input, from)
}

/// Translates the contents of a single reader to a different format.
///
/// See [`Translator::translate_reader`].
pub fn translate_reader<R, W>(input: R, from: Option<Format>, to: Format, output: W) -> Result<()>
where
	R: Read,
	W: Write,
{
	Translator::new(output, to).translate_reader(input, from)
}

/// Translates multiple inputs to a single serialized output.
///
/// A `Translator` accepts both slice and reader input. See [`translate_slice`]
/// and [`translate_reader`] for considerations associated with each kind of
/// source.
///
/// When a `Translator` is used more than once to translate different inputs, it
/// outputs the logical concatenation of all documents from all inputs as if
/// they had been presented in a single input. When translating to a format
/// without multi-document support, translation will fail if the translator
/// encounters more than one document in the first input, or if the translator
/// is called a second time with another input.
pub struct Translator<W>(Dispatcher<W>)
where
	W: Write;

impl<W> Translator<W>
where
	W: Write,
{
	/// Creates a translator that produces output in the given format.
	pub fn new(output: W, to: Format) -> Translator<W> {
		Translator(Dispatcher::new(output, to))
	}

	/// Translates the contents of a single input slice to a different format.
	///
	/// Slice inputs are typically more efficient to translate than reader
	/// inputs, but require all input to be available in memory in advance. For
	/// unbounded streams like standard input or non-regular files, consider
	/// using [`translate_reader`] rather than reading the entire stream into
	/// memory manually.
	///
	/// When `from` is `None`, xt will attempt to detect the input format from
	/// the input itself.
	pub fn translate_slice(&mut self, input: &[u8], from: Option<Format>) -> Result<()> {
		self.translate(input::Handle::from_slice(input), from)
	}

	/// Translates the contents of a single reader to a different format.
	///
	/// Reader inputs enable streaming translation for most formats, allowing xt
	/// to translate documents as they appear in the stream without buffering
	/// more than one document in memory at a time. When translating from a
	/// format that does not support streaming, xt will automatically read the
	/// entire input into memory before starting translation.
	///
	/// When `from` is `None`, xt will attempt to detect the input format from
	/// the input itself. The current format detection implementation does this
	/// by fully reading the contents of a single document into memory before
	/// starting translation.
	pub fn translate_reader<R>(&mut self, input: R, from: Option<Format>) -> Result<()>
	where
		R: Read,
	{
		self.translate(input::Handle::from_reader(input), from)
	}

	/// Translates a single serialized input to a different format.
	fn translate(&mut self, mut input: input::Handle<'_>, from: Option<Format>) -> Result<()> {
		let from = match from {
			Some(format) => format,
			None => match detect::detect_format(&mut input)? {
				Some(format) => format,
				None => return Err("unable to detect input format".into()),
			},
		};
		match from {
			Format::Json => json::transcode(input, &mut self.0),
			Format::Yaml => yaml::transcode(input, &mut self.0),
			Format::Toml => toml::transcode(input, &mut self.0),
			Format::Msgpack => msgpack::transcode(input, &mut self.0),
		}
	}

	/// [Flushes](Write::flush) the underlying writer.
	pub fn flush(&mut self) -> io::Result<()> {
		(&mut self.0).flush()
	}
}

/// A trait for output formats to receive their translatable input.
trait Output {
	fn transcode_from<'de, D, E>(&mut self, de: D) -> Result<()>
	where
		D: de::Deserializer<'de, Error = E>,
		E: de::Error + Send + Sync + 'static;

	fn transcode_value<S>(&mut self, value: S) -> Result<()>
	where
		S: ser::Serialize;

	fn flush(&mut self) -> io::Result<()>;
}

/// An [`Output`] supporting static dispatch based on a known output format.
enum Dispatcher<W>
where
	W: Write,
{
	Json(json::Output<W>),
	Yaml(yaml::Output<W>),
	Toml(toml::Output<W>),
	Msgpack(msgpack::Output<W>),
}

impl<W> Dispatcher<W>
where
	W: Write,
{
	fn new(writer: W, to: Format) -> Dispatcher<W> {
		match to {
			Format::Json => Dispatcher::Json(json::Output::new(writer)),
			Format::Yaml => Dispatcher::Yaml(yaml::Output::new(writer)),
			Format::Toml => Dispatcher::Toml(toml::Output::new(writer)),
			Format::Msgpack => Dispatcher::Msgpack(msgpack::Output::new(writer)),
		}
	}
}

impl<W> Output for &mut Dispatcher<W>
where
	W: Write,
{
	fn transcode_from<'de, D, E>(&mut self, de: D) -> Result<()>
	where
		D: de::Deserializer<'de, Error = E>,
		E: de::Error + Send + Sync + 'static,
	{
		match self {
			Dispatcher::Json(output) => output.transcode_from(de),
			Dispatcher::Yaml(output) => output.transcode_from(de),
			Dispatcher::Toml(output) => output.transcode_from(de),
			Dispatcher::Msgpack(output) => output.transcode_from(de),
		}
	}

	fn transcode_value<S>(&mut self, value: S) -> Result<()>
	where
		S: ser::Serialize,
	{
		match self {
			Dispatcher::Json(output) => output.transcode_value(value),
			Dispatcher::Yaml(output) => output.transcode_value(value),
			Dispatcher::Toml(output) => output.transcode_value(value),
			Dispatcher::Msgpack(output) => output.transcode_value(value),
		}
	}

	fn flush(&mut self) -> io::Result<()> {
		match self {
			Dispatcher::Json(output) => output.flush(),
			Dispatcher::Yaml(output) => output.flush(),
			Dispatcher::Toml(output) => output.flush(),
			Dispatcher::Msgpack(output) => output.flush(),
		}
	}
}

/// The set of input and output formats supported by xt.
///
/// Support for each format comes largely from external crates, with some
/// additional preprocessing by xt for select formats. The crate selection for
/// each format is **not stable**, and is documented for informational purposes
/// only.
#[derive(Copy, Clone)]
#[non_exhaustive]
pub enum Format {
	/// The [JSON][json] format as interpreted by [`serde_json`].
	///
	/// This format supports multi-document translation and streaming input.
	///
	/// [json]: https://datatracker.ietf.org/doc/html/rfc8259
	Json,
	/// The [YAML 1.2][yaml] format as interpreted by [`serde_yaml`].
	///
	/// This format supports multi-document translation and streaming input.
	///
	/// [yaml]: https://yaml.org/spec/1.2.2/
	Yaml,
	/// The [TOML][toml] format as interpreted by [`toml`][::toml].
	///
	/// This format supports single-document translation only, and as such does
	/// not support streaming input.
	///
	/// [toml]: https://github.com/toml-lang/toml
	Toml,
	/// The [MessagePack][msgpack] format as interpreted by [`rmp_serde`].
	///
	/// This format supports multi-document translation and streaming input.
	///
	/// [msgpack]: https://msgpack.org/
	Msgpack,
}

impl fmt::Display for Format {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		f.write_str(match self {
			Self::Json => "JSON",
			Self::Yaml => "YAML",
			Self::Toml => "TOML",
			Self::Msgpack => "MessagePack",
		})
	}
}