Skip to main content

apple_plist/
de.rs

1//! [`Decoder`], the format-detection ladder, and the decode-side entry
2//! points.
3
4use std::fmt;
5use std::io::Read;
6
7#[cfg(feature = "serde")]
8use serde::de::DeserializeOwned;
9
10use crate::error::Result;
11use crate::format::Format;
12use crate::value::Value;
13
14/// Reads one property-list document from a reader, auto-detecting its
15/// [`Format`].
16///
17/// The first decode call buffers the reader to end of input; decoding works
18/// over that buffer, so the reader needs no `Seek` bound and repeated decode
19/// calls re-run detection over the same bytes, returning equal values for
20/// every format. Decode memory is proportional to the input size.
21///
22/// # Examples
23///
24/// ```
25/// use apple_plist::{Decoder, Format};
26///
27/// let mut decoder = Decoder::new(&b"(1,2,3)"[..]);
28/// assert_eq!(decoder.format(), None);
29/// let value = decoder.decode_value()?;
30/// assert_eq!(value.as_array().map(Vec::len), Some(3));
31/// assert_eq!(decoder.format(), Some(Format::OpenStep));
32/// # Ok::<(), apple_plist::Error>(())
33/// ```
34pub struct Decoder<R> {
35    reader: R,
36    buffer: Option<Vec<u8>>,
37    format: Option<Format>,
38}
39
40impl<R: Read> Decoder<R> {
41    /// Creates a decoder over `reader`; no I/O happens until the first
42    /// decode call.
43    pub const fn new(reader: R) -> Self {
44        Self {
45            reader,
46            buffer: None,
47            format: None,
48        }
49    }
50
51    /// The format detected by the most recent successful parse, or `None`
52    /// if no parse has succeeded yet.
53    ///
54    /// The format is recorded before the value maps into the target type,
55    /// so a failed [`decode`](Self::decode) whose document parsed still
56    /// reports it.
57    #[must_use]
58    pub const fn format(&self) -> Option<Format> {
59        self.format
60    }
61
62    /// Decodes the buffered document into a [`Value`] tree.
63    ///
64    /// # Errors
65    ///
66    /// Returns [`Error::Io`](crate::Error::Io) when buffering the reader
67    /// fails, and otherwise whatever the detection ladder reports:
68    /// [`Error::Parse`](crate::Error::Parse) for malformed documents,
69    /// [`Error::MaxDepthExceeded`](crate::Error::MaxDepthExceeded) for
70    /// hostile nesting, and
71    /// [`Error::InvalidPlist`](crate::Error::InvalidPlist) /
72    /// [`Error::FeatureDisabled`](crate::Error::FeatureDisabled) in builds
73    /// whose codec features are compiled out.
74    pub fn decode_value(&mut self) -> Result<Value> {
75        let (value, format) = parse_auto(self.buffered()?)?;
76        self.format = Some(format);
77        Ok(value)
78    }
79
80    /// Decodes the buffered document into any [`DeserializeOwned`] type.
81    ///
82    /// When detection reports [`Format::OpenStep`] — a format that can only
83    /// store strings — the mapping coerces strings into requested integers,
84    /// floats, booleans, and dates, the codec's lax mode.
85    ///
86    /// # Errors
87    ///
88    /// Everything [`decode_value`](Self::decode_value) can return, plus the
89    /// mapping failures of [`from_value`](crate::from_value).
90    ///
91    /// # Examples
92    ///
93    /// ```
94    /// use apple_plist::Decoder;
95    ///
96    /// let document = b"<?xml version=\"1.0\"?><plist><integer>42</integer></plist>";
97    /// let answer: i64 = Decoder::new(&document[..]).decode()?;
98    /// assert_eq!(answer, 42);
99    /// # Ok::<(), apple_plist::Error>(())
100    /// ```
101    #[cfg(feature = "serde")]
102    pub fn decode<T: DeserializeOwned>(&mut self) -> Result<T> {
103        let (value, format) = parse_auto(self.buffered()?)?;
104        self.format = Some(format);
105        let lax = format == Format::OpenStep;
106        T::deserialize(crate::value::de::ValueDeserializer::new(value, lax))
107    }
108
109    /// Buffers the reader to end of input once; later calls reuse the
110    /// buffer. A failed read keeps no partial buffer, so a subsequent call
111    /// retries the reader.
112    fn buffered(&mut self) -> Result<&[u8]> {
113        if self.buffer.is_none() {
114            let mut data = Vec::new();
115            let _ = self.reader.read_to_end(&mut data)?;
116            self.buffer = Some(data);
117        }
118        Ok(self.buffer.as_deref().unwrap_or_default())
119    }
120}
121
122impl<R> fmt::Debug for Decoder<R> {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        f.debug_struct("Decoder")
125            .field("format", &self.format)
126            .finish_non_exhaustive()
127    }
128}
129
130/// The detection ladder: a 6-byte `bplist` magic commits to the binary
131/// parser; otherwise the XML parser runs, and only its
132/// [`Error::InvalidPlist`](crate::Error::InvalidPlist) verdict falls
133/// through to the text parser.
134pub(crate) fn parse_auto(bytes: &[u8]) -> Result<(Value, Format)> {
135    if bytes.starts_with(b"bplist") {
136        return binary_rung(bytes);
137    }
138    let retry = match xml_rung(bytes) {
139        Ok(value) => return Ok((value, Format::Xml)),
140        Err(error) if error.is_retry_signal() => error,
141        Err(error) => return Err(error),
142    };
143    text_rung(bytes, retry)
144}
145
146#[cfg(feature = "binary")]
147fn binary_rung(bytes: &[u8]) -> Result<(Value, Format)> {
148    crate::binary::parser::parse(bytes).map(|value| (value, Format::Binary))
149}
150
151#[cfg(not(feature = "binary"))]
152fn binary_rung(_bytes: &[u8]) -> Result<(Value, Format)> {
153    // The magic commits the ladder even when the codec is compiled out.
154    Err(crate::error::Error::invalid("binary"))
155}
156
157#[cfg(feature = "xml")]
158fn xml_rung(bytes: &[u8]) -> Result<Value> {
159    crate::xml::parser::parse(bytes)
160}
161
162#[cfg(not(feature = "xml"))]
163fn xml_rung(_bytes: &[u8]) -> Result<Value> {
164    // A compiled-out rung behaves as if its parser returned the retry signal.
165    Err(crate::error::Error::invalid("XML"))
166}
167
168#[cfg(feature = "openstep")]
169fn text_rung(bytes: &[u8], _xml_failure: crate::error::Error) -> Result<(Value, Format)> {
170    crate::text::parse(bytes)
171}
172
173#[cfg(not(feature = "openstep"))]
174fn text_rung(_bytes: &[u8], xml_failure: crate::error::Error) -> Result<(Value, Format)> {
175    if cfg!(feature = "xml") {
176        Err(xml_failure)
177    } else {
178        Err(crate::error::Error::FeatureDisabled {
179            format: Format::Xml,
180        })
181    }
182}
183
184/// Identifies the property-list format of `data`, or `None` when no enabled
185/// codec accepts it.
186///
187/// Runs the full detection ladder and discards the parsed value — format
188/// identification costs a complete parse, exactly like a decode. The answer
189/// therefore matches what [`Decoder::format`] would report after a
190/// successful decode of the same bytes, and depends on the codec features
191/// enabled in this build (enabling more features only turns `None` into
192/// `Some`).
193///
194/// # Examples
195///
196/// ```
197/// use apple_plist::{Format, detect};
198///
199/// assert_eq!(detect(b"<string>hi</string>"), Some(Format::Xml));
200/// assert_eq!(detect(b"(1,2,<*I3>)"), Some(Format::GnuStep));
201/// assert_eq!(detect(b"bplist00"), None);
202/// ```
203#[must_use]
204pub fn detect(data: &[u8]) -> Option<Format> {
205    parse_auto(data).ok().map(|(_, format)| format)
206}
207
208/// Deserializes a property-list document from a byte slice, auto-detecting
209/// its format.
210///
211/// Returns the value alone; use a [`Decoder`] when the detected format
212/// matters.
213///
214/// # Errors
215///
216/// Everything [`Decoder::decode`] can return.
217///
218/// # Examples
219///
220/// ```
221/// let answer: i64 = apple_plist::from_slice(b"<integer>42</integer>")?;
222/// assert_eq!(answer, 42);
223/// # Ok::<(), apple_plist::Error>(())
224/// ```
225#[cfg(feature = "serde")]
226pub fn from_slice<T: DeserializeOwned>(data: &[u8]) -> Result<T> {
227    Decoder::new(data).decode()
228}
229
230/// Deserializes a property-list document from a reader, auto-detecting its
231/// format.
232///
233/// The reader is buffered to end of input first; decode memory is
234/// proportional to the input size.
235///
236/// # Errors
237///
238/// Everything [`Decoder::decode`] can return, including
239/// [`Error::Io`](crate::Error::Io) when the reader fails.
240///
241/// # Examples
242///
243/// ```
244/// let answer: bool = apple_plist::from_reader(&b"<true/>"[..])?;
245/// assert!(answer);
246/// # Ok::<(), apple_plist::Error>(())
247/// ```
248#[cfg(feature = "serde")]
249pub fn from_reader<R: Read, T: DeserializeOwned>(reader: R) -> Result<T> {
250    Decoder::new(reader).decode()
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[test]
258    fn debug_elides_the_reader() {
259        let decoder = Decoder::new(&b""[..]);
260        let rendered = format!("{decoder:?}");
261        assert!(rendered.starts_with("Decoder"));
262        assert!(rendered.contains("None"));
263    }
264
265    #[cfg(all(feature = "xml", feature = "binary", feature = "openstep"))]
266    mod full_ladder {
267        #![expect(clippy::unwrap_used, reason = "test code: unwrap is the assertion")]
268
269        use std::collections::BTreeMap;
270
271        use super::*;
272        use crate::error::Error;
273        use crate::value::{Dictionary, Integer};
274
275        /// `String.binary.plist` from the golden corpus.
276        const HELLO_BPLIST: &[u8] = b"bplist00UHello\x08\
277            \x00\x00\x00\x00\x00\x00\x01\x01\
278            \x00\x00\x00\x00\x00\x00\x00\x01\
279            \x00\x00\x00\x00\x00\x00\x00\x00\
280            \x00\x00\x00\x00\x00\x00\x00\x0e";
281
282        fn decode_bytes(data: &[u8]) -> (Result<Value>, Option<Format>) {
283            let mut decoder = Decoder::new(data);
284            let result = decoder.decode_value();
285            (result, decoder.format())
286        }
287
288        #[test]
289        fn format_detection_table_is_correct() {
290            // Format detection over all seven rows.
291            let (value, format) = decode_bytes(HELLO_BPLIST);
292            assert_eq!(value.unwrap(), Value::String("Hello".into()));
293            assert_eq!(format, Some(Format::Binary));
294
295            let (value, format) = decode_bytes(b"<string>&lt;*I3&gt;</string>");
296            assert_eq!(value.unwrap(), Value::String("<*I3>".into()));
297            assert_eq!(format, Some(Format::Xml));
298
299            let (value, format) = decode_bytes(b"bplist00");
300            assert!(matches!(
301                value,
302                Err(Error::Parse {
303                    format: "binary",
304                    ..
305                })
306            ));
307            assert_eq!(format, None);
308
309            let (value, format) = decode_bytes(b"(1,2,3,4,5)");
310            assert_eq!(
311                value.unwrap(),
312                Value::Array(
313                    ["1", "2", "3", "4", "5"]
314                        .map(|s| Value::String(s.into()))
315                        .to_vec()
316                )
317            );
318            assert_eq!(format, Some(Format::OpenStep));
319
320            let (value, format) = decode_bytes(b"<abab>");
321            assert_eq!(value.unwrap(), Value::Data(vec![0xAB, 0xAB]));
322            assert_eq!(format, Some(Format::OpenStep));
323
324            let (value, format) = decode_bytes(b"(1,2,<*I3>)");
325            assert_eq!(
326                value.unwrap(),
327                Value::Array(vec![
328                    Value::String("1".into()),
329                    Value::String("2".into()),
330                    Value::Integer(Integer::Signed(3)),
331                ])
332            );
333            assert_eq!(format, Some(Format::GnuStep));
334
335            let (value, format) = decode_bytes(b"\x00");
336            assert!(matches!(value, Err(Error::Parse { format: "text", .. })));
337            assert_eq!(format, None);
338        }
339
340        #[test]
341        fn detect_agrees_with_the_ladder() {
342            assert_eq!(detect(HELLO_BPLIST), Some(Format::Binary));
343            assert_eq!(detect(b"<string>&lt;*I3&gt;</string>"), Some(Format::Xml));
344            assert_eq!(detect(b"bplist00"), None);
345            assert_eq!(detect(b"(1,2,3,4,5)"), Some(Format::OpenStep));
346            assert_eq!(detect(b"<abab>"), Some(Format::OpenStep));
347            assert_eq!(detect(b"(1,2,<*I3>)"), Some(Format::GnuStep));
348            assert_eq!(detect(b"\x00"), None);
349            assert_eq!(detect(b""), Some(Format::OpenStep));
350        }
351
352        #[test]
353        fn empty_whitespace_and_comment_only_input_is_an_empty_dictionary() {
354            for input in [&b""[..], b" \n\t", b"// hi", b"/* hi */"] {
355                let (value, format) = decode_bytes(input);
356                assert_eq!(value.unwrap(), Value::Dictionary(Dictionary::new()));
357                assert_eq!(format, Some(Format::OpenStep));
358            }
359        }
360
361        #[test]
362        fn short_non_magic_prefixes_never_sniff_as_binary() {
363            let (value, format) = decode_bytes(b"bplis");
364            assert_eq!(value.unwrap(), Value::String("bplis".into()));
365            assert_eq!(format, Some(Format::OpenStep));
366
367            // Exactly the magic commits and fails inside the binary parser.
368            let (value, format) = decode_bytes(b"bplist");
369            assert!(matches!(
370                value,
371                Err(Error::Parse {
372                    format: "binary",
373                    ..
374                })
375            ));
376            assert_eq!(format, None);
377
378            // The magic commits even for would-be OpenStep documents.
379            let (value, _) = decode_bytes(b"bplistish = x;");
380            assert!(matches!(
381                value,
382                Err(Error::Parse {
383                    format: "binary",
384                    ..
385                })
386            ));
387        }
388
389        #[test]
390        fn xml_hard_errors_do_not_retry_as_text() {
391            let (value, format) = decode_bytes(b"<plist/>");
392            assert!(matches!(value, Err(Error::Parse { format: "XML", .. })));
393            assert_eq!(format, None);
394
395            let (value, _) = decode_bytes(b"<plist>");
396            assert!(matches!(value, Err(Error::Parse { format: "XML", .. })));
397        }
398
399        #[test]
400        fn xml_depth_overrun_is_fatal_without_text_retry() {
401            let mut doc = Vec::new();
402            for _ in 0..200 {
403                doc.extend_from_slice(b"<array>");
404            }
405            let (value, format) = decode_bytes(&doc);
406            assert!(matches!(value, Err(Error::MaxDepthExceeded)));
407            assert_eq!(format, None);
408        }
409
410        #[test]
411        fn when_xml_retries_and_text_fails_the_text_error_surfaces() {
412            let (value, format) = decode_bytes(b"{ a = ");
413            assert!(matches!(value, Err(Error::Parse { format: "text", .. })));
414            assert_eq!(format, None);
415        }
416
417        #[test]
418        fn bom_matrix_follows_the_ladder() {
419            let (value, format) = decode_bytes(b"\xEF\xBB\xBF<string>x</string>");
420            assert_eq!(value.unwrap(), Value::String("x".into()));
421            assert_eq!(format, Some(Format::Xml));
422
423            let (value, format) = decode_bytes(b"\xEF\xBB\xBF{a=b;}");
424            assert_eq!(
425                value.unwrap(),
426                Value::Dictionary(Dictionary::from([(
427                    "a".to_owned(),
428                    Value::String("b".into()),
429                )]))
430            );
431            assert_eq!(format, Some(Format::OpenStep));
432
433            let mut bom_bplist = b"\xEF\xBB\xBF".to_vec();
434            bom_bplist.extend_from_slice(HELLO_BPLIST);
435            assert_ne!(detect(&bom_bplist), Some(Format::Binary));
436        }
437
438        #[test]
439        fn repeated_decodes_are_idempotent_for_every_format() {
440            let documents: [&[u8]; 4] =
441                [HELLO_BPLIST, b"{a=b;}", b"(<*I1>)", b"<string>x</string>"];
442            for document in documents {
443                let mut decoder = Decoder::new(document);
444                let first = decoder.decode_value().unwrap();
445                let first_format = decoder.format();
446                let second = decoder.decode_value().unwrap();
447                assert_eq!(first, second);
448                assert_eq!(decoder.format(), first_format);
449            }
450        }
451
452        #[test]
453        fn parse_failures_leave_the_previous_format_in_place() {
454            let mut decoder = Decoder::new(&b"bplist00"[..]);
455            assert!(decoder.decode_value().is_err());
456            assert_eq!(decoder.format(), None);
457            assert!(decoder.decode_value().is_err());
458            assert_eq!(decoder.format(), None);
459        }
460
461        #[test]
462        fn io_failures_surface_and_a_later_call_retries_the_reader() {
463            struct FlakyReader {
464                attempts: usize,
465            }
466            impl Read for FlakyReader {
467                fn read(&mut self, _buf: &mut [u8]) -> std::io::Result<usize> {
468                    self.attempts += 1;
469                    if self.attempts == 1 {
470                        Err(std::io::Error::other("transient"))
471                    } else {
472                        Ok(0)
473                    }
474                }
475            }
476            let mut decoder = Decoder::new(FlakyReader { attempts: 0 });
477            assert!(matches!(decoder.decode_value(), Err(Error::Io(_))));
478            assert_eq!(decoder.format(), None);
479            // The retry reaches end of input: an empty OpenStep dictionary.
480            assert_eq!(
481                decoder.decode_value().unwrap(),
482                Value::Dictionary(Dictionary::new())
483            );
484            assert_eq!(decoder.format(), Some(Format::OpenStep));
485        }
486
487        #[cfg(feature = "serde")]
488        mod with_serde {
489            use serde::Deserialize;
490
491            use super::*;
492            use crate::date::Date;
493
494            #[test]
495            fn format_is_recorded_before_the_mapping_fails() {
496                let mut decoder = Decoder::new(&b"<string>abc</string>"[..]);
497                let result: Result<i64> = decoder.decode();
498                assert!(result.is_err());
499                assert_eq!(decoder.format(), Some(Format::Xml));
500            }
501
502            #[test]
503            fn lax_decode_coerces_strings_for_openstep_only() {
504                // Lax decode through the public ladder.
505                #[derive(Deserialize, Debug, PartialEq)]
506                struct LaxTestData {
507                    #[serde(rename = "I64")]
508                    signed: i64,
509                    #[serde(rename = "U64")]
510                    unsigned: u64,
511                    #[serde(rename = "F64")]
512                    float: f64,
513                    #[serde(rename = "B")]
514                    flag: bool,
515                    #[serde(rename = "D")]
516                    date: Date,
517                }
518                let document = br#"{B=1;D="2013-11-27 00:34:00 +0000";I64=1;F64="3.0";U64=2;}"#;
519                let mut decoder = Decoder::new(&document[..]);
520                let parsed: LaxTestData = decoder.decode().unwrap();
521                assert_eq!(decoder.format(), Some(Format::OpenStep));
522                assert_eq!(
523                    parsed,
524                    LaxTestData {
525                        signed: 1,
526                        unsigned: 2,
527                        float: 3.0,
528                        flag: true,
529                        date: Date::parse_text_layout("2013-11-27 00:34:00 +0000").unwrap(),
530                    }
531                );
532
533                // The same coercion is rejected for strict (XML) documents.
534                let strict: Result<i64> = from_slice(b"<string>1</string>");
535                assert!(strict.is_err());
536
537                // Lax coercion failures still error.
538                let bad: Result<i64> = from_slice(b"abc");
539                assert!(bad.is_err());
540            }
541
542            #[test]
543            fn decode_value_and_decode_into_value_agree() {
544                let documents: [&[u8]; 4] = [
545                    b"<array><integer>1</integer></array>",
546                    b"{a=b;}",
547                    b"(<*R1.5>)",
548                    HELLO_BPLIST,
549                ];
550                for document in documents {
551                    let direct = Decoder::new(document).decode_value().unwrap();
552                    let mapped: Value = Decoder::new(document).decode().unwrap();
553                    assert_eq!(direct, mapped);
554                }
555            }
556
557            #[test]
558            fn chunked_readers_still_detect_binary() {
559                // Short-read format detection.
560                struct ChunkedReader<'a> {
561                    data: &'a [u8],
562                    chunk: usize,
563                }
564                impl Read for ChunkedReader<'_> {
565                    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
566                        let take = self.chunk.min(self.data.len()).min(buf.len());
567                        let (head, tail) = self.data.split_at(take);
568                        buf.get_mut(..take)
569                            .map(|slot| slot.copy_from_slice(head))
570                            .ok_or_else(|| std::io::Error::other("buffer too small"))?;
571                        self.data = tail;
572                        Ok(take)
573                    }
574                }
575                let document =
576                    crate::ser::to_vec(&BTreeMap::from([("a", "b"), ("c", "d")]), Format::Binary)
577                        .unwrap();
578                for chunk in [1, 2, 3, 5] {
579                    let mut decoder = Decoder::new(ChunkedReader {
580                        data: &document,
581                        chunk,
582                    });
583                    let map: BTreeMap<String, String> = decoder.decode().unwrap();
584                    assert_eq!(decoder.format(), Some(Format::Binary), "chunk {chunk}");
585                    assert_eq!(map.len(), 2);
586                }
587            }
588        }
589    }
590}