1use std::fmt;
5use std::io::Read;
6
7#[cfg(feature = "serde")]
8use serde::de::DeserializeOwned;
9
10use crate::error::Result;
11use crate::format::Format;
12use crate::value::Value;
13
14pub struct Decoder<R> {
35 reader: R,
36 buffer: Option<Vec<u8>>,
37 format: Option<Format>,
38}
39
40impl<R: Read> Decoder<R> {
41 pub const fn new(reader: R) -> Self {
44 Self {
45 reader,
46 buffer: None,
47 format: None,
48 }
49 }
50
51 #[must_use]
58 pub const fn format(&self) -> Option<Format> {
59 self.format
60 }
61
62 pub fn decode_value(&mut self) -> Result<Value> {
75 let (value, format) = parse_auto(self.buffered()?)?;
76 self.format = Some(format);
77 Ok(value)
78 }
79
80 #[cfg(feature = "serde")]
102 pub fn decode<T: DeserializeOwned>(&mut self) -> Result<T> {
103 let (value, format) = parse_auto(self.buffered()?)?;
104 self.format = Some(format);
105 let lax = format == Format::OpenStep;
106 T::deserialize(crate::value::de::ValueDeserializer::new(value, lax))
107 }
108
109 fn buffered(&mut self) -> Result<&[u8]> {
113 if self.buffer.is_none() {
114 let mut data = Vec::new();
115 let _ = self.reader.read_to_end(&mut data)?;
116 self.buffer = Some(data);
117 }
118 Ok(self.buffer.as_deref().unwrap_or_default())
119 }
120}
121
122impl<R> fmt::Debug for Decoder<R> {
123 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124 f.debug_struct("Decoder")
125 .field("format", &self.format)
126 .finish_non_exhaustive()
127 }
128}
129
130pub(crate) fn parse_auto(bytes: &[u8]) -> Result<(Value, Format)> {
135 if bytes.starts_with(b"bplist") {
136 return binary_rung(bytes);
137 }
138 let retry = match xml_rung(bytes) {
139 Ok(value) => return Ok((value, Format::Xml)),
140 Err(error) if error.is_retry_signal() => error,
141 Err(error) => return Err(error),
142 };
143 text_rung(bytes, retry)
144}
145
146#[cfg(feature = "binary")]
147fn binary_rung(bytes: &[u8]) -> Result<(Value, Format)> {
148 crate::binary::parser::parse(bytes).map(|value| (value, Format::Binary))
149}
150
151#[cfg(not(feature = "binary"))]
152fn binary_rung(_bytes: &[u8]) -> Result<(Value, Format)> {
153 Err(crate::error::Error::invalid("binary"))
155}
156
157#[cfg(feature = "xml")]
158fn xml_rung(bytes: &[u8]) -> Result<Value> {
159 crate::xml::parser::parse(bytes)
160}
161
162#[cfg(not(feature = "xml"))]
163fn xml_rung(_bytes: &[u8]) -> Result<Value> {
164 Err(crate::error::Error::invalid("XML"))
166}
167
168#[cfg(feature = "openstep")]
169fn text_rung(bytes: &[u8], _xml_failure: crate::error::Error) -> Result<(Value, Format)> {
170 crate::text::parse(bytes)
171}
172
173#[cfg(not(feature = "openstep"))]
174fn text_rung(_bytes: &[u8], xml_failure: crate::error::Error) -> Result<(Value, Format)> {
175 if cfg!(feature = "xml") {
176 Err(xml_failure)
177 } else {
178 Err(crate::error::Error::FeatureDisabled {
179 format: Format::Xml,
180 })
181 }
182}
183
184#[must_use]
204pub fn detect(data: &[u8]) -> Option<Format> {
205 parse_auto(data).ok().map(|(_, format)| format)
206}
207
208#[cfg(feature = "serde")]
226pub fn from_slice<T: DeserializeOwned>(data: &[u8]) -> Result<T> {
227 Decoder::new(data).decode()
228}
229
230#[cfg(feature = "serde")]
249pub fn from_reader<R: Read, T: DeserializeOwned>(reader: R) -> Result<T> {
250 Decoder::new(reader).decode()
251}
252
253#[cfg(test)]
254mod tests {
255 use super::*;
256
257 #[test]
258 fn debug_elides_the_reader() {
259 let decoder = Decoder::new(&b""[..]);
260 let rendered = format!("{decoder:?}");
261 assert!(rendered.starts_with("Decoder"));
262 assert!(rendered.contains("None"));
263 }
264
265 #[cfg(all(feature = "xml", feature = "binary", feature = "openstep"))]
266 mod full_ladder {
267 #![expect(clippy::unwrap_used, reason = "test code: unwrap is the assertion")]
268
269 use std::collections::BTreeMap;
270
271 use super::*;
272 use crate::error::Error;
273 use crate::value::{Dictionary, Integer};
274
275 const HELLO_BPLIST: &[u8] = b"bplist00UHello\x08\
277 \x00\x00\x00\x00\x00\x00\x01\x01\
278 \x00\x00\x00\x00\x00\x00\x00\x01\
279 \x00\x00\x00\x00\x00\x00\x00\x00\
280 \x00\x00\x00\x00\x00\x00\x00\x0e";
281
282 fn decode_bytes(data: &[u8]) -> (Result<Value>, Option<Format>) {
283 let mut decoder = Decoder::new(data);
284 let result = decoder.decode_value();
285 (result, decoder.format())
286 }
287
288 #[test]
289 fn format_detection_table_is_correct() {
290 let (value, format) = decode_bytes(HELLO_BPLIST);
292 assert_eq!(value.unwrap(), Value::String("Hello".into()));
293 assert_eq!(format, Some(Format::Binary));
294
295 let (value, format) = decode_bytes(b"<string><*I3></string>");
296 assert_eq!(value.unwrap(), Value::String("<*I3>".into()));
297 assert_eq!(format, Some(Format::Xml));
298
299 let (value, format) = decode_bytes(b"bplist00");
300 assert!(matches!(
301 value,
302 Err(Error::Parse {
303 format: "binary",
304 ..
305 })
306 ));
307 assert_eq!(format, None);
308
309 let (value, format) = decode_bytes(b"(1,2,3,4,5)");
310 assert_eq!(
311 value.unwrap(),
312 Value::Array(
313 ["1", "2", "3", "4", "5"]
314 .map(|s| Value::String(s.into()))
315 .to_vec()
316 )
317 );
318 assert_eq!(format, Some(Format::OpenStep));
319
320 let (value, format) = decode_bytes(b"<abab>");
321 assert_eq!(value.unwrap(), Value::Data(vec![0xAB, 0xAB]));
322 assert_eq!(format, Some(Format::OpenStep));
323
324 let (value, format) = decode_bytes(b"(1,2,<*I3>)");
325 assert_eq!(
326 value.unwrap(),
327 Value::Array(vec![
328 Value::String("1".into()),
329 Value::String("2".into()),
330 Value::Integer(Integer::Signed(3)),
331 ])
332 );
333 assert_eq!(format, Some(Format::GnuStep));
334
335 let (value, format) = decode_bytes(b"\x00");
336 assert!(matches!(value, Err(Error::Parse { format: "text", .. })));
337 assert_eq!(format, None);
338 }
339
340 #[test]
341 fn detect_agrees_with_the_ladder() {
342 assert_eq!(detect(HELLO_BPLIST), Some(Format::Binary));
343 assert_eq!(detect(b"<string><*I3></string>"), Some(Format::Xml));
344 assert_eq!(detect(b"bplist00"), None);
345 assert_eq!(detect(b"(1,2,3,4,5)"), Some(Format::OpenStep));
346 assert_eq!(detect(b"<abab>"), Some(Format::OpenStep));
347 assert_eq!(detect(b"(1,2,<*I3>)"), Some(Format::GnuStep));
348 assert_eq!(detect(b"\x00"), None);
349 assert_eq!(detect(b""), Some(Format::OpenStep));
350 }
351
352 #[test]
353 fn empty_whitespace_and_comment_only_input_is_an_empty_dictionary() {
354 for input in [&b""[..], b" \n\t", b"// hi", b"/* hi */"] {
355 let (value, format) = decode_bytes(input);
356 assert_eq!(value.unwrap(), Value::Dictionary(Dictionary::new()));
357 assert_eq!(format, Some(Format::OpenStep));
358 }
359 }
360
361 #[test]
362 fn short_non_magic_prefixes_never_sniff_as_binary() {
363 let (value, format) = decode_bytes(b"bplis");
364 assert_eq!(value.unwrap(), Value::String("bplis".into()));
365 assert_eq!(format, Some(Format::OpenStep));
366
367 let (value, format) = decode_bytes(b"bplist");
369 assert!(matches!(
370 value,
371 Err(Error::Parse {
372 format: "binary",
373 ..
374 })
375 ));
376 assert_eq!(format, None);
377
378 let (value, _) = decode_bytes(b"bplistish = x;");
380 assert!(matches!(
381 value,
382 Err(Error::Parse {
383 format: "binary",
384 ..
385 })
386 ));
387 }
388
389 #[test]
390 fn xml_hard_errors_do_not_retry_as_text() {
391 let (value, format) = decode_bytes(b"<plist/>");
392 assert!(matches!(value, Err(Error::Parse { format: "XML", .. })));
393 assert_eq!(format, None);
394
395 let (value, _) = decode_bytes(b"<plist>");
396 assert!(matches!(value, Err(Error::Parse { format: "XML", .. })));
397 }
398
399 #[test]
400 fn xml_depth_overrun_is_fatal_without_text_retry() {
401 let mut doc = Vec::new();
402 for _ in 0..200 {
403 doc.extend_from_slice(b"<array>");
404 }
405 let (value, format) = decode_bytes(&doc);
406 assert!(matches!(value, Err(Error::MaxDepthExceeded)));
407 assert_eq!(format, None);
408 }
409
410 #[test]
411 fn when_xml_retries_and_text_fails_the_text_error_surfaces() {
412 let (value, format) = decode_bytes(b"{ a = ");
413 assert!(matches!(value, Err(Error::Parse { format: "text", .. })));
414 assert_eq!(format, None);
415 }
416
417 #[test]
418 fn bom_matrix_follows_the_ladder() {
419 let (value, format) = decode_bytes(b"\xEF\xBB\xBF<string>x</string>");
420 assert_eq!(value.unwrap(), Value::String("x".into()));
421 assert_eq!(format, Some(Format::Xml));
422
423 let (value, format) = decode_bytes(b"\xEF\xBB\xBF{a=b;}");
424 assert_eq!(
425 value.unwrap(),
426 Value::Dictionary(Dictionary::from([(
427 "a".to_owned(),
428 Value::String("b".into()),
429 )]))
430 );
431 assert_eq!(format, Some(Format::OpenStep));
432
433 let mut bom_bplist = b"\xEF\xBB\xBF".to_vec();
434 bom_bplist.extend_from_slice(HELLO_BPLIST);
435 assert_ne!(detect(&bom_bplist), Some(Format::Binary));
436 }
437
438 #[test]
439 fn repeated_decodes_are_idempotent_for_every_format() {
440 let documents: [&[u8]; 4] =
441 [HELLO_BPLIST, b"{a=b;}", b"(<*I1>)", b"<string>x</string>"];
442 for document in documents {
443 let mut decoder = Decoder::new(document);
444 let first = decoder.decode_value().unwrap();
445 let first_format = decoder.format();
446 let second = decoder.decode_value().unwrap();
447 assert_eq!(first, second);
448 assert_eq!(decoder.format(), first_format);
449 }
450 }
451
452 #[test]
453 fn parse_failures_leave_the_previous_format_in_place() {
454 let mut decoder = Decoder::new(&b"bplist00"[..]);
455 assert!(decoder.decode_value().is_err());
456 assert_eq!(decoder.format(), None);
457 assert!(decoder.decode_value().is_err());
458 assert_eq!(decoder.format(), None);
459 }
460
461 #[test]
462 fn io_failures_surface_and_a_later_call_retries_the_reader() {
463 struct FlakyReader {
464 attempts: usize,
465 }
466 impl Read for FlakyReader {
467 fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
468 self.attempts += 1;
469 if self.attempts == 1 {
470 Err(std::io::Error::other("transient"))
471 } else {
472 Ok(0)
473 }
474 }
475 }
476 let mut decoder = Decoder::new(FlakyReader { attempts: 0 });
477 assert!(matches!(decoder.decode_value(), Err(Error::Io(_))));
478 assert_eq!(decoder.format(), None);
479 assert_eq!(
481 decoder.decode_value().unwrap(),
482 Value::Dictionary(Dictionary::new())
483 );
484 assert_eq!(decoder.format(), Some(Format::OpenStep));
485 }
486
487 #[cfg(feature = "serde")]
488 mod with_serde {
489 use serde::Deserialize;
490
491 use super::*;
492 use crate::date::Date;
493
494 #[test]
495 fn format_is_recorded_before_the_mapping_fails() {
496 let mut decoder = Decoder::new(&b"<string>abc</string>"[..]);
497 let result: Result<i64> = decoder.decode();
498 assert!(result.is_err());
499 assert_eq!(decoder.format(), Some(Format::Xml));
500 }
501
502 #[test]
503 fn lax_decode_coerces_strings_for_openstep_only() {
504 #[derive(Deserialize, Debug, PartialEq)]
506 struct LaxTestData {
507 #[serde(rename = "I64")]
508 signed: i64,
509 #[serde(rename = "U64")]
510 unsigned: u64,
511 #[serde(rename = "F64")]
512 float: f64,
513 #[serde(rename = "B")]
514 flag: bool,
515 #[serde(rename = "D")]
516 date: Date,
517 }
518 let document = br#"{B=1;D="2013-11-27 00:34:00 +0000";I64=1;F64="3.0";U64=2;}"#;
519 let mut decoder = Decoder::new(&document[..]);
520 let parsed: LaxTestData = decoder.decode().unwrap();
521 assert_eq!(decoder.format(), Some(Format::OpenStep));
522 assert_eq!(
523 parsed,
524 LaxTestData {
525 signed: 1,
526 unsigned: 2,
527 float: 3.0,
528 flag: true,
529 date: Date::parse_text_layout("2013-11-27 00:34:00 +0000").unwrap(),
530 }
531 );
532
533 let strict: Result<i64> = from_slice(b"<string>1</string>");
535 assert!(strict.is_err());
536
537 let bad: Result<i64> = from_slice(b"abc");
539 assert!(bad.is_err());
540 }
541
542 #[test]
543 fn decode_value_and_decode_into_value_agree() {
544 let documents: [&[u8]; 4] = [
545 b"<array><integer>1</integer></array>",
546 b"{a=b;}",
547 b"(<*R1.5>)",
548 HELLO_BPLIST,
549 ];
550 for document in documents {
551 let direct = Decoder::new(document).decode_value().unwrap();
552 let mapped: Value = Decoder::new(document).decode().unwrap();
553 assert_eq!(direct, mapped);
554 }
555 }
556
557 #[test]
558 fn chunked_readers_still_detect_binary() {
559 struct ChunkedReader<'a> {
561 data: &'a [u8],
562 chunk: usize,
563 }
564 impl Read for ChunkedReader<'_> {
565 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
566 let take = self.chunk.min(self.data.len()).min(buf.len());
567 let (head, tail) = self.data.split_at(take);
568 buf.get_mut(..take)
569 .map(|slot| slot.copy_from_slice(head))
570 .ok_or_else(|| std::io::Error::other("buffer too small"))?;
571 self.data = tail;
572 Ok(take)
573 }
574 }
575 let document =
576 crate::ser::to_vec(&BTreeMap::from([("a", "b"), ("c", "d")]), Format::Binary)
577 .unwrap();
578 for chunk in [1, 2, 3, 5] {
579 let mut decoder = Decoder::new(ChunkedReader {
580 data: &document,
581 chunk,
582 });
583 let map: BTreeMap<String, String> = decoder.decode().unwrap();
584 assert_eq!(decoder.format(), Some(Format::Binary), "chunk {chunk}");
585 assert_eq!(map.len(), 2);
586 }
587 }
588 }
589 }
590}