openstep_plist/
lib.rs

1use std::{borrow::Cow, collections::BTreeMap, fmt::Debug};
2
3use serde::{Deserialize, Serialize};
4use smol_str::SmolStr;
5
6pub mod de;
7pub mod error;
8pub mod ser;
9
10pub use crate::error::Error;
11
12/// A plist dictionary
13pub type Dictionary = BTreeMap<SmolStr, Plist>;
14
15/// An array of plist values
16pub type Array = Vec<Plist>;
17
18/// An enum representing a property list.
19#[derive(Clone, Debug, Serialize, Deserialize)]
20#[serde(untagged)]
21pub enum Plist {
22    Dictionary(Dictionary),
23    Array(Array),
24    String(String),
25    Integer(i64),
26    Float(f64),
27    #[serde(with = "serde_bytes")]
28    Data(Vec<u8>),
29}
30
31impl PartialEq for Plist {
32    fn eq(&self, other: &Self) -> bool {
33        match (self, other) {
34            (Plist::Dictionary(a), Plist::Dictionary(b)) => a == b,
35            (Plist::Array(a), Plist::Array(b)) => a == b,
36            (Plist::String(a), Plist::String(b)) => a == b,
37            (Plist::Integer(a), Plist::Integer(b)) => a == b,
38            (Plist::Float(a), Plist::Float(b)) => (a * 1000.0).round() == (b * 1000.0).round(),
39            (Plist::Data(a), Plist::Data(b)) => a == b,
40            _ => false,
41        }
42    }
43}
44
45#[derive(Debug)]
46pub(crate) enum Token<'a> {
47    Eof,
48    OpenBrace,
49    OpenParen,
50    Data(Vec<u8>),
51    String(Cow<'a, str>),
52    Atom(&'a str),
53}
54
55fn is_numeric(b: u8) -> bool {
56    b.is_ascii_digit() || b == b'.' || b == b'-'
57}
58
59fn is_alnum(b: u8) -> bool {
60    // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f50813e28a4ec09954ffa359e6f/CFOldStylePList.c#L79
61    is_numeric(b)
62        || b.is_ascii_uppercase()
63        || b.is_ascii_lowercase()
64        || b == b'_'
65        || b == b'$'
66        || b == b'/'
67        || b == b':'
68        || b == b'.'
69        || b == b'-'
70}
71
72// Used for serialization; make sure UUID's get quoted
73fn is_alnum_strict(b: u8) -> bool {
74    is_alnum(b) && b != b'-'
75}
76
77fn is_hex_upper(b: u8) -> bool {
78    b.is_ascii_digit() || (b'A'..=b'F').contains(&b)
79}
80
81fn is_ascii_whitespace(b: u8) -> bool {
82    b == b' ' || b == b'\t' || b == b'\r' || b == b'\n'
83}
84
85fn numeric_ok(s: &str) -> bool {
86    let s = s.as_bytes();
87    if s.is_empty() {
88        return false;
89    }
90    let s = if s.len() > 1 && (*s.first().unwrap(), *s.last().unwrap()) == (b'"', b'"') {
91        &s[1..s.len()]
92    } else {
93        s
94    };
95    if s.iter().all(|&b| is_hex_upper(b)) && !s.iter().all(|&b| b.is_ascii_digit()) {
96        return false;
97    }
98    if s.len() > 1 && s[0] == b'0' {
99        return !s.iter().all(|&b| b.is_ascii_digit());
100    }
101    // Prevent parsing of "infinity", "inf", "nan" as numbers, we
102    // want to keep them as strings (e.g. glyphname)
103    // https://doc.rust-lang.org/std/primitive.f64.html#grammar
104    if s.eq_ignore_ascii_case(b"infinity")
105        || s.eq_ignore_ascii_case(b"inf")
106        || s.eq_ignore_ascii_case(b"nan")
107    {
108        return false;
109    }
110    true
111}
112
113fn skip_ws(s: &str, mut ix: usize) -> usize {
114    while ix < s.len() && is_ascii_whitespace(s.as_bytes()[ix]) {
115        ix += 1;
116    }
117    ix
118}
119
120impl Plist {
121    pub fn parse(s: &str) -> Result<Plist, Error> {
122        let (plist, _ix) = Plist::parse_rec(s, 0)?;
123        // TODO: check that we're actually at eof
124        Ok(plist)
125    }
126
127    fn name(&self) -> &'static str {
128        match self {
129            Plist::Array(..) => "array",
130            Plist::Dictionary(..) => "dictionary",
131            Plist::Float(..) => "float",
132            Plist::Integer(..) => "integer",
133            Plist::String(..) => "string",
134            Plist::Data(..) => "data",
135        }
136    }
137
138    pub fn get(&self, key: &str) -> Option<&Plist> {
139        match self {
140            Plist::Dictionary(d) => d.get(key),
141            _ => None,
142        }
143    }
144
145    pub fn as_dict(&self) -> Option<&BTreeMap<SmolStr, Plist>> {
146        match self {
147            Plist::Dictionary(d) => Some(d),
148            _ => None,
149        }
150    }
151
152    pub fn as_array(&self) -> Option<&[Plist]> {
153        match self {
154            Plist::Array(a) => Some(a),
155            _ => None,
156        }
157    }
158
159    pub fn as_str(&self) -> Option<&str> {
160        match self {
161            Plist::String(s) => Some(s),
162            _ => None,
163        }
164    }
165
166    pub fn as_i64(&self) -> Option<i64> {
167        match self {
168            Plist::Integer(i) => Some(*i),
169            _ => None,
170        }
171    }
172
173    pub fn as_f64(&self) -> Option<f64> {
174        match self {
175            Plist::Integer(i) => Some(*i as f64),
176            Plist::Float(f) => Some(*f),
177            _ => None,
178        }
179    }
180
181    pub fn flatten_to_integer(&self) -> Plist {
182        match self {
183            Plist::Float(f) => {
184                if f.fract() == 0.0 {
185                    Plist::Integer(*f as i64)
186                } else {
187                    Plist::Float(*f)
188                }
189            }
190            Plist::String(s) => {
191                if let Ok(num) = s.parse() {
192                    Plist::Integer(num)
193                } else {
194                    self.clone()
195                }
196            }
197            _ => self.clone(),
198        }
199    }
200    pub fn flatten_to_string(&self) -> Plist {
201        match self {
202            Plist::Integer(i) => Plist::String(i.to_string()),
203            Plist::Float(f) => {
204                if f.fract() == 0.0 {
205                    Plist::String((*f as i64).to_string())
206                } else {
207                    Plist::String(f.to_string())
208                }
209            }
210            _ => self.clone(),
211        }
212    }
213
214    pub fn expect_dict(self) -> Result<Dictionary, Error> {
215        match self {
216            Plist::Dictionary(dict) => Ok(dict),
217            _other => Err(Error::UnexpectedDataType {
218                expected: "dictionary",
219                found: _other.name(),
220            }),
221        }
222    }
223
224    pub fn expect_array(self) -> Result<Array, Error> {
225        match self {
226            Plist::Array(array) => Ok(array),
227            _other => Err(Error::UnexpectedDataType {
228                expected: "array",
229                found: _other.name(),
230            }),
231        }
232    }
233
234    pub fn expect_string(self) -> Result<String, Error> {
235        match self {
236            Plist::String(string) => Ok(string),
237            _other => Err(Error::UnexpectedDataType {
238                expected: "string",
239                found: _other.name(),
240            }),
241        }
242    }
243
244    pub fn expect_data(self) -> Result<Vec<u8>, Error> {
245        match self {
246            Plist::Data(bytes) => Ok(bytes),
247            _other => Err(Error::UnexpectedDataType {
248                expected: "data",
249                found: _other.name(),
250            }),
251        }
252    }
253
254    fn parse_rec(s: &str, ix: usize) -> Result<(Plist, usize), Error> {
255        let (tok, mut ix) = Token::lex(s, ix)?;
256        match tok {
257            Token::Atom(s) => Ok((Plist::parse_atom(s), ix)),
258            Token::String(s) => Ok((Plist::String(s.into()), ix)),
259            Token::Data(bytes) => Ok((Plist::Data(bytes), ix)),
260            Token::OpenBrace => {
261                let mut dict = BTreeMap::new();
262                loop {
263                    if let Some(ix) = Token::expect(s, ix, b'}') {
264                        return Ok((Plist::Dictionary(dict), ix));
265                    }
266                    let (key, next) = Token::lex(s, ix)?;
267                    let key_str = Token::try_into_smolstr(key)?;
268                    let next = Token::expect(s, next, b'=');
269                    if next.is_none() {
270                        return Err(Error::ExpectedEquals);
271                    }
272                    let (val, next) = Self::parse_rec(s, next.unwrap())?;
273                    dict.insert(key_str, val);
274                    if let Some(next) = Token::expect(s, next, b';') {
275                        ix = next;
276                    } else {
277                        return Err(Error::ExpectedSemicolon);
278                    }
279                }
280            }
281            Token::OpenParen => {
282                let mut list = Vec::new();
283                loop {
284                    if let Some(ix) = Token::expect(s, ix, b')') {
285                        return Ok((Plist::Array(list), ix));
286                    }
287                    let (val, next) = Self::parse_rec(s, ix)?;
288                    list.push(val);
289                    if let Some(ix) = Token::expect(s, next, b')') {
290                        return Ok((Plist::Array(list), ix));
291                    }
292                    if let Some(next) = Token::expect(s, next, b',') {
293                        ix = next;
294                        if let Some(next) = Token::expect(s, next, b')') {
295                            return Ok((Plist::Array(list), next));
296                        }
297                    } else {
298                        return Err(Error::ExpectedComma);
299                    }
300                }
301            }
302            _ => Err(Error::UnexpectedToken { name: tok.name() }),
303        }
304    }
305
306    fn parse_atom(s: &str) -> Plist {
307        if numeric_ok(s) {
308            if let Ok(num) = s.parse() {
309                return Plist::Integer(num);
310            }
311            if let Ok(num) = s.parse() {
312                return Plist::Float(num);
313            }
314        }
315        Plist::String(s.into())
316    }
317
318    #[allow(clippy::inherent_to_string, unused)]
319    pub fn to_string(&self) -> String {
320        crate::ser::to_string(&self).unwrap()
321    }
322
323    pub fn is_meaningful(&self) -> bool {
324        match self {
325            Plist::Array(a) => !a.is_empty(),
326            Plist::Dictionary(d) => !d.is_empty(),
327            Plist::String(s) => !s.is_empty(),
328            Plist::Integer(i) => *i != 0,
329            Plist::Float(f) => *f != 0.0,
330            Plist::Data(d) => !d.is_empty(),
331        }
332    }
333}
334
335impl Default for Plist {
336    fn default() -> Self {
337        // kind of arbitrary but seems okay
338        Plist::Array(Vec::new())
339    }
340}
341
342fn byte_from_hex(hex: [u8; 2]) -> Result<u8, Error> {
343    fn hex_digit_to_byte(digit: u8) -> Result<u8, Error> {
344        match digit {
345            b'0'..=b'9' => Ok(digit - b'0'),
346            b'a'..=b'f' => Ok(digit - b'a' + 10),
347            b'A'..=b'F' => Ok(digit - b'A' + 10),
348            _ => Err(Error::BadData),
349        }
350    }
351    let maj = hex_digit_to_byte(hex[0])? << 4;
352    let min = hex_digit_to_byte(hex[1])?;
353    Ok(maj | min)
354}
355
356impl<'a> Token<'a> {
357    fn lex(s: &'a str, ix: usize) -> Result<(Token<'a>, usize), Error> {
358        let start = skip_ws(s, ix);
359        if start == s.len() {
360            return Ok((Token::Eof, start));
361        }
362        let b = s.as_bytes()[start];
363        match b {
364            b'{' => Ok((Token::OpenBrace, start + 1)),
365            b'(' => Ok((Token::OpenParen, start + 1)),
366            b'<' => {
367                let data_start = start + 1;
368                let data_end = data_start
369                    + s.as_bytes()[data_start..]
370                        .iter()
371                        .position(|b| *b == b'>')
372                        .ok_or(Error::UnclosedData)?;
373                let chunks = s.as_bytes()[data_start..data_end].chunks_exact(2);
374                if !chunks.remainder().is_empty() {
375                    return Err(Error::BadData);
376                }
377                let data = chunks
378                    .map(|x| byte_from_hex(x.try_into().unwrap()))
379                    .collect::<Result<_, _>>()?;
380                Ok((Token::Data(data), data_end + 1))
381            }
382            b'"' => {
383                let mut ix = start + 1;
384                let mut cow_start = ix;
385                let mut buf = String::new();
386                while ix < s.len() {
387                    let b = s.as_bytes()[ix];
388                    match b {
389                        b'"' => {
390                            // End of string
391                            let string = if buf.is_empty() {
392                                s[cow_start..ix].into()
393                            } else {
394                                buf.push_str(&s[cow_start..ix]);
395                                buf.into()
396                            };
397                            return Ok((Token::String(string), ix + 1));
398                        }
399                        b'\\' => {
400                            buf.push_str(&s[cow_start..ix]);
401                            if ix + 1 == s.len() {
402                                return Err(Error::UnclosedString);
403                            }
404                            let (c, len) = parse_escape(&s[ix..])?;
405                            buf.push(c);
406                            ix += len;
407                            cow_start = ix;
408                        }
409                        _ => ix += 1,
410                    }
411                }
412                Err(Error::UnclosedString)
413            }
414            _ => {
415                if is_alnum(b) {
416                    let mut ix = start + 1;
417                    while ix < s.len() {
418                        if !is_alnum(s.as_bytes()[ix]) {
419                            break;
420                        }
421                        ix += 1;
422                    }
423                    Ok((Token::Atom(&s[start..ix]), ix))
424                } else {
425                    Err(Error::UnexpectedChar(s[start..].chars().next().unwrap()))
426                }
427            }
428        }
429    }
430
431    fn try_into_smolstr(self) -> Result<SmolStr, Error> {
432        match self {
433            Token::Atom(s) => Ok(s.into()),
434            Token::String(s) => Ok(s.into()),
435            _ => Err(Error::NotAString {
436                token_name: self.name(),
437            }),
438        }
439    }
440
441    fn expect(s: &str, ix: usize, delim: u8) -> Option<usize> {
442        let ix = skip_ws(s, ix);
443        if ix < s.len() {
444            let b = s.as_bytes()[ix];
445            if b == delim {
446                return Some(ix + 1);
447            }
448        }
449        None
450    }
451
452    pub(crate) fn name(&self) -> &'static str {
453        match self {
454            Token::Atom(..) => "Atom",
455            Token::String(..) => "String",
456            Token::Eof => "Eof",
457            Token::OpenBrace => "OpenBrace",
458            Token::OpenParen => "OpenParen",
459            Token::Data(_) => "Data",
460        }
461    }
462}
463
464fn parse_escape(s: &str) -> Result<(char, usize), Error> {
465    // checked before this is called
466    assert!(s.starts_with('\\') && s.len() > 1);
467
468    let mut ix = 1;
469    let b = s.as_bytes()[ix];
470    match b {
471        b'"' | b'\\' => Ok((b as _, 2)),
472        b'n' => Ok(('\n', 2)),
473        b'r' => Ok(('\r', 2)),
474        b't' => Ok(('\t', 2)),
475        // unicode escapes
476        b'U' if s.len() >= 3 => {
477            // here we will parse up to 4 hexdigits:
478            // https://github.com/opensource-apple/CF/blob/3cc41a76b1491f5/CFOldStylePList.c#L150C2-L150C6
479            ix += 1;
480            let (val, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
481            ix += len;
482            let result = if !is_surrogate(val) || !s.as_bytes()[ix..].starts_with(b"\\U") {
483                // we can't cast! this is a utf-16 value, not a codepoint
484                char::decode_utf16([val]).next()
485            } else {
486                ix += 2;
487                let (val2, len) = parse_hex_digit(&s.as_bytes()[ix..])?;
488                ix += len;
489                char::decode_utf16([val, val2]).next()
490            };
491            result
492                .transpose()
493                .ok()
494                .flatten()
495                .ok_or_else(|| Error::InvalidUnicodeEscape(s[..ix].to_string()))
496                .map(|c| (c, ix))
497        }
498        b'0'..=b'3' if s.len() >= 4 => {
499            // octal escape
500            let b1 = s.as_bytes()[ix + 1];
501            let b2 = s.as_bytes()[ix + 2];
502            if (b'0'..=b'7').contains(&b1) && (b'0'..=b'7').contains(&b2) {
503                let oct = (b - b'0') * 64 + (b1 - b'0') * 8 + (b2 - b'0');
504                ix += 3;
505                Ok((oct as _, ix))
506            } else {
507                Err(Error::UnknownEscape)
508            }
509        }
510        _ => Err(Error::UnknownEscape),
511    }
512}
513
514fn is_surrogate(val: u16) -> bool {
515    matches!(val, 0xD800..=0xDFFF)
516}
517
518// parse up to four hex digits as a u16
519// returns an error if the first byte is not a valid ascii hex digit,
520// then will read up to four bytes
521fn parse_hex_digit(bytes: &[u8]) -> Result<(u16, usize), Error> {
522    match bytes {
523        &[] => Err(Error::UnknownEscape),
524        &[one, ..] if !one.is_ascii_hexdigit() => Err(Error::UnknownEscape),
525        other => Ok(other
526            .iter()
527            .take(4)
528            .map_while(|b| (*b as char).to_digit(16).map(|x| x as u16))
529            .fold((0u16, 0usize), |(num, len), hexval| {
530                ((num << 4) + hexval, len + 1)
531            })),
532    }
533}
534
535impl From<bool> for Plist {
536    fn from(x: bool) -> Plist {
537        Plist::Integer(x as i64)
538    }
539}
540
541impl From<String> for Plist {
542    fn from(x: String) -> Plist {
543        Plist::String(x)
544    }
545}
546
547impl From<SmolStr> for Plist {
548    fn from(x: SmolStr) -> Plist {
549        Plist::String(x.into())
550    }
551}
552
553impl From<i64> for Plist {
554    fn from(x: i64) -> Plist {
555        Plist::Integer(x)
556    }
557}
558
559impl From<f64> for Plist {
560    fn from(x: f64) -> Plist {
561        Plist::Float(x)
562    }
563}
564
565impl From<Dictionary> for Plist {
566    fn from(x: Dictionary) -> Plist {
567        Plist::Dictionary(x)
568    }
569}
570
571impl<T> From<Vec<T>> for Plist
572where
573    T: Into<Plist>,
574{
575    fn from(x: Vec<T>) -> Plist {
576        Plist::Array(x.into_iter().map(Into::into).collect())
577    }
578}
579
580#[cfg(test)]
581mod tests {
582    use std::collections::BTreeMap;
583
584    use super::*;
585
586    #[test]
587    fn parse_unquoted_strings() {
588        let contents = r#"
589        {
590            name = "UFO Filename";
591            value1 = ../../build/instance_ufos/Testing_Rg.ufo;
592            value2 = _;
593            value3 = $;
594            value4 = /;
595            value5 = :;
596            value6 = .;
597            value7 = -;
598        }
599        "#;
600
601        let plist = Plist::parse(contents).unwrap();
602        let plist_expected = Plist::Dictionary(BTreeMap::from_iter([
603            ("name".into(), Plist::String("UFO Filename".into())),
604            (
605                "value1".into(),
606                Plist::String("../../build/instance_ufos/Testing_Rg.ufo".into()),
607            ),
608            ("value2".into(), Plist::String("_".into())),
609            ("value3".into(), Plist::String("$".into())),
610            ("value4".into(), Plist::String("/".into())),
611            ("value5".into(), Plist::String(":".into())),
612            ("value6".into(), Plist::String(".".into())),
613            ("value7".into(), Plist::String("-".into())),
614        ]));
615        assert_eq!(plist, plist_expected);
616    }
617
618    #[test]
619    fn parse_binary_data() {
620        let contents = r#"
621        {
622            mydata = <deadbeef>;
623        }
624            "#;
625        let plist = Plist::parse(contents).unwrap();
626        let data = plist.get("mydata").unwrap().clone().expect_data().unwrap();
627        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef]);
628    }
629
630    #[test]
631    fn test_serde_binary_data() {
632        // let contents = r#"
633        // {
634        //     de.kutilek.scrawl.data = <89504e470d0a1a0a>;
635        // }
636        // "#;
637        // let plist = Plist::parse(contents).unwrap();
638        // println!("{:?}", plist);
639        let plist = Plist::Data(vec![0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]);
640        let mut deserializer = crate::de::Deserializer::from_plist(&plist);
641        Plist::deserialize(&mut deserializer).unwrap();
642    }
643
644    #[test]
645    fn ascii_to_hex() {
646        assert_eq!(byte_from_hex([b'0', b'1']), Ok(0x01));
647        assert_eq!(byte_from_hex([b'0', b'0']), Ok(0x00));
648        assert_eq!(byte_from_hex([b'f', b'f']), Ok(0xff));
649        assert_eq!(byte_from_hex([b'f', b'0']), Ok(0xf0));
650        assert_eq!(byte_from_hex([b'0', b'f']), Ok(0x0f));
651    }
652
653    #[test]
654    fn parse_to_plist_type() {
655        let plist_str = r#"
656        {
657            name = "meta";
658            value = (
659                {
660                    data = latn;
661                    tag = dlng;
662                    num = 5;
663                },
664                {
665                    data = "latn,cyrl";
666                    tag = slng;
667                    num = -3.0;
668                }
669            );
670        }"#;
671
672        let plist = Plist::parse(plist_str).unwrap();
673        let root = plist.expect_dict().unwrap();
674        assert_eq!(root.get("name").unwrap().as_str(), Some("meta"));
675        let value = root.get("value").unwrap().as_array().unwrap();
676        assert_eq!(value.len(), 2);
677        let first = value[0].as_dict().unwrap();
678        assert_eq!(first.get("data").and_then(Plist::as_str), Some("latn"));
679        assert_eq!(first.get("tag").and_then(Plist::as_str), Some("dlng"));
680        assert_eq!(first.get("num").and_then(Plist::as_i64), Some(5));
681        let second = value[1].as_dict().unwrap();
682        assert_eq!(
683            second.get("data").and_then(Plist::as_str),
684            Some("latn,cyrl")
685        );
686        assert_eq!(second.get("tag").and_then(Plist::as_str), Some("slng"));
687        assert_eq!(second.get("num").and_then(Plist::as_f64), Some(-3.0));
688    }
689}